**********************************************************************************************
* This is a master create file, which creates all the data files based on the raw data input *
**********************************************************************************************
/*


** Melvin
global data "D:\Users\wong\Dropbox\Geocoded Aid and Conflict\Data"
global dofiles "D:\Users\wong\Dropbox\Geocoded Aid and Conflict\do-files"
global rawdata "D:\Users\wong\Dropbox\Geocoded Aid and Conflict\Raw Data"

**Kai
global data "C:\Users\gehring\Dropbox\Geocoded Aid and Conflict\Data"
global dofiles "C:\Users\gehring\Dropbox\Geocoded Aid and Conflict\do-files"
global rawdata "C:\Users\gehring\Dropbox\Geocoded Aid and Conflict\Raw Data"

** Lennart
global data "C:\Users\lkaplan\Dropbox\Geocoded Aid and Conflict\Data"
global dofiles "C:\Users\lkaplan\Dropbox\Geocoded Aid and Conflict\do-files"
global rawdata "C:\Users\lkaplan\Dropbox\Geocoded Aid and Conflict\Raw Data"


*/
global cwd "$data\Aid\2017_11_14_WB"
cd "$cwd"
********************************************************************************
*** Define program for aid allocation using location weights
********************************************************************************
capture program drop allocAid
program define allocAid
	qui{
	***HELP FILE***
	/*
	First argument is reserved for name of donor
	Second argument is reserved for variable name of aid variable
	
	Required datasets:
		- `1'_disbursement.dta
		- gadm2.dta
		
	Variables needed in "`1'_disbursement.dta" to run program:
	- precision_N100
	- transaction_value_tot
	- temp_totcoded
	- Disbursementcount
	- transaction_year
	- ID_adm1
	- ID_adm2
	- ISO3  
	- ADM0 
	- ADM1 
	- ADM2
	- ID_1
	- ID_2
	- ID_0
	- d_miss_ADM2
	*/
	
	** Get stata package
	capture ssc install fs, replace
	******************************************************************************************
	
	*****Generate location weighted data with precision code 1-3 (ADM2 information) for ADM2 level****
	noisily di "1) Open `1' aid dataset"
	use "`1'_disbursement.dta", clear	 
	keep if precision_N100<4
	
	*XXXXXX Melvin 31.01.2018: Rename variable to avoid merger error
	rename Disbursementcount `2'_disbcount

	noisily di "2) Allocate aid according to the number of locations and the general share of precison-123-locations per project for ADM2 level"
	* For instance 100 [transaction_value_tot]/8 (number of projects with precision1-4)
	gen transaction_value_loc=transaction_value_tot/temp_totcoded
	replace `2'_disbcount=`2'_disbcount/temp_totcoded
	* WorldBank specific: Allocate aid over sectors
	if "`1'"=="IDA" | "`1'"=="IBRD" | "`1'"=="China" {
		foreach g in AX BX CX EX FX JX LX TX WX YX{
			gen transaction_value_loc_`g'=transaction_value_tot_`g'/temp_totcoded
			rename Disbursementcount_`g' `2'_disbcount_`g'
			replace `2'_disbcount_`g'=`2'_disbcount_`g'/temp_totcoded
		}
	}
	* Collapse on ADM2 level to aggregate all project disbursements per ADM2 region"
	collapse (sum) transaction_value_loc* `2'_disbcount*, by(transaction_year ISO3  ADM0 ADM1 ADM2 ID_0 ID_1 ID_2 ID_adm1 ID_adm2)

	* Rename Variables
	renvars transaction_value_loc `2'_disbcount / `2'_ADM2_LOC13 `2'_disbcount_ADM213
	* WorldBank specific:
	if "`1'"=="IDA" | "`1'"=="IBRD" | "`1'"=="China" {
		foreach g in AX BX CX EX FX JX LX TX WX YX{
			renvars transaction_value_loc_`g' `2'_disbcount_`g' / `2'_ADM2_LOC_`g'13 `2'_disbcount_ADM2_`g'13
		}
	}
	save "`1'_disbursement_ADM2_prec13.dta", replace 


	noisily di "3) Generate location weighted data with precision code 4 (Only ADM1 information) for ADM1 level"
	use "`1'_disbursement.dta", clear
	keep if (precision_N100==4)
	*XXXXXX Melvin 31.01.2018: Rename variable to avoid merger error
	rename Disbursementcount `2'_disbcount
	
	*create local for time period of dataset for later use (create after droping precisioncode 4 to get relevant years to append. Some donors do not have projects for some years with precision code 4 e.g. India)
	levelsof transaction_year, local(T)

	* Allocate aid according to the number of locations and the general share of precison-4-locations per project //
	gen transaction_value_loc=transaction_value_tot/temp_totcoded
	replace `2'_disbcount=`2'_disbcount/temp_totcoded
	* WorldBank specific:
	if "`1'"=="IDA" | "`1'"=="IBRD" | "`1'"=="China" {
		foreach g in AX BX CX EX FX JX LX TX WX YX{
			gen transaction_value_loc_`g'=transaction_value_tot_`g'/temp_totcoded
			rename Disbursementcount_`g' `2'_disbcount_`g'
			replace `2'_disbcount_`g'=`2'_disbcount_`g'/temp_totcoded
		}
	}
	collapse (sum) transaction_value_loc* `2'_disbcount*, by(transaction_year ID_0 ID_1 ID_adm1)
	renvars transaction_value_loc `2'_disbcount / `2'_ADM1_LOC4 `2'_disbcount_ADM14
	// merge ADM1 names
	merge m:1 ID_adm1 using gadm1_ids.dta, nogen keep(1 3)
	* WorldBank specific: Allocate aid over sectors
		if "`1'"=="IDA" | "`1'"=="IBRD" | "`1'"=="China"{
			foreach g in AX BX CX EX FX JX LX TX WX YX{
			renvars transaction_value_loc_`g' `2'_disbcount_`g' / `2'_ADM1_LOC_`g'4 `2'_disbcount_ADM1_`g'4
		}
	}
	save "`1'_disbursement_ADM1_prec4.dta", replace 

	noisily di "4) Prepare location weighted data with precision code 4 (ADM2 information)"

	foreach i in `T' {
		use "`1'_disbursement_ADM1_prec4.dta", replace
		keep if transaction_year==`i'
		merge 1:m ID_adm1 using gadm2.dta, nogen keep(1 3)
		save "T_`i'.dta", replace
	}
	* Put yearly disbursements together
	clear //clear datasets first, otherwise create duplicates
	fs T_*.dta // put file names in list
	append using `r(files)' // append all files in list
	// delete single files
	foreach i in `T' {
		erase "T_`i'.dta"
	}


	* Split ADM1 level aid of precision 4 equally across corresponding ADM2 regions
	gen count=1
	bysort ID_adm1 transaction_year: egen totalcount=total(count)
	* allocate aid with prec4 to all ADM2 regions 
	gen `2'_ADM2_LOC4 =`2'_ADM1_LOC4 /totalcount
	gen `2'_disbcount_ADM24=`2'_disbcount_ADM14/totalcount
	* WorldBank specific: Allocate aid over sectors
	if "`1'"=="IDA" | "`1'"=="IBRD" | "`1'"=="China"{
		foreach g in AX BX CX EX FX JX LX TX WX YX{
			gen `2'_ADM2_LOC_`g'4 =`2'_ADM1_LOC_`g'4  /totalcount
			gen `2'_disbcount_ADM2_`g'4=`2'_disbcount_ADM1_`g'4/totalcount
			drop `2'_ADM1_LOC_`g'4 `2'_disbcount_ADM1_`g'4
		}
	}
	drop `2'_ADM1_LOC4 `2'_disbcount_ADM14
	save "`1'_disbursement_ADM2_prec4.dta", replace 



	noisily di "5) Merge data with precision code 4 and precision codes 1-3 to create location weighted aid on ADM2 level"
	use "`1'_disbursement_ADM2_prec13.dta", clear

	*	XXXXXX Lennart, 05.01.2018: In the following step we lose around 200 million of aid (0.32% of total allocated aid), which could not be attributed to regions preivously as we had no ID_0, which indicates that something with the geo-merge went wrong
	drop if ID_0==0 //No geographic information available for AID projects (Missing Aid information worth about 5bn USD)
		
	* Rename variables with precisioncode 1-3 to prepare them for merge with precisioncode 4
	renvars `2'_ADM2_LOC13 `2'_disbcount_ADM213 / `2'_ADM2_LOC `2'_disbcount_ADM2_LOC
	* WorldBank specific: Allocate aid over sectors
	if "`1'"=="IDA" | "`1'"=="IBRD" | "`1'"=="China"{
		foreach g in AX BX CX EX FX JX LX TX WX YX{
			renvars `2'_ADM2_LOC_`g'13 `2'_disbcount_ADM2_`g'13 / `2'_ADM2_LOC_`g'  `2'_disbcount_ADM2_LOC_`g' 
		}
	}
	* Add data with precisioncode 4:
	merge 1:1 ID_adm2 transaction_year using "`1'_Disbursement_ADM2_prec4.dta",  nogen
	* Replace missings
	replace `2'_ADM2_LOC=0 if `2'_ADM2_LOC==.
	replace `2'_ADM2_LOC4=0 if `2'_ADM2_LOC4 ==.
* XXXX  Lennart 11.01.2018: In line with Melvin's command below, we replace all missing `2'_disbcounts
	replace `2'_disbcount_ADM24=0 if `2'_disbcount_ADM24==.

	* Add data up
	replace `2'_ADM2_LOC=`2'_ADM2_LOC+`2'_ADM2_LOC4 
	replace `2'_disbcount_ADM2_LOC=`2'_disbcount_ADM2_LOC+`2'_disbcount_ADM24
	* WorldBank specific: Allocate aid over sectors
	if "`1'"=="IDA" | "`1'"=="IBRD" | "`1'"=="China"{	
		foreach g in AX BX CX EX FX JX LX TX WX YX{
			* Replace missings
			replace `2'_ADM2_LOC_`g'=0 if `2'_ADM2_LOC_`g'==.
			replace `2'_ADM2_LOC_`g'4=0 if `2'_ADM2_LOC_`g'4 ==.
			replace `2'_disbcount_ADM2_`g'4=0 if `2'_disbcount_ADM2_`g'4==.
			replace `2'_disbcount_ADM2_LOC_`g'=0 if `2'_disbcount_ADM2_LOC_`g'==.
*XXXXXX Melvin 06.01.2018: @Lennart, do we not need to set the disbursement count equal to 0 if missing?
* XXXX  Lennart 11.01.2018: @Melvin, you are absolutely right. We should also replace the missings for Disbursementocunts
			* Add data up
			replace `2'_ADM2_LOC_`g'=`2'_ADM2_LOC_`g'+`2'_ADM2_LOC_`g'4
			replace `2'_disbcount_ADM2_LOC_`g'=`2'_disbcount_ADM2_LOC_`g'+`2'_disbcount_ADM2_`g'4
			* Clean
			drop `2'_disbcount_ADM2_`g'4 `2'_ADM2_LOC_`g'4
		}
	}
	drop `2'_disbcount_ADM2*4 `2'_ADM2_LOC*4
	drop d_miss_ADM2 count totalcount total
	* Labeling
	label var `2'_ADM2_LOC "Value of `1' Aid disbursements per ADM2 region(weighted by number of project locations)"
	label var `2'_disbcount_ADM2_LOC " Number of non-negative `1' aid disbursements per region"
	* WorldBank specific: Allocate aid over sectors
	if "`1'"=="IDA" | "`1'"=="IBRD" | "`1'"=="China" {
		foreach g in AX BX CX EX FX JX LX TX WX YX{
			label var `2'_disbcount_ADM2_LOC_`g' " Number of non-negative `1' aid disbursements per region in sector `g'"
			label var `2'_ADM2_LOC_`g' "Value of `1' Aid per ADM2 region in sector `g' (weighted by # of proj. locations)"
		}	
	}
	save "`1'_disbursement_ADM2.dta", replace
		
		
	noisily di "6) Generate location weighted aid on ADM1 level by collapsing ADM1 level data"
	collapse (sum) `2'_ADM2_LOC* `2'_disbcount_ADM2*, by(transaction_year ID_0 ID_1 ID_adm1)
	// merge ADM1 names
	merge m:1 ID_adm1 using gadm1_ids.dta, nogen keep(1 3)
	* Rename Variables
	renvars `2'_ADM2_LOC `2'_disbcount_ADM2_LOC / `2'_ADM1_LOC `2'_disbcount_ADM1_LOC
	* WorldBank specific: Allocate aid over sectors
	if "`1'"=="IDA" | "`1'"=="IBRD" | "`1'"=="China" {
		foreach g in AX BX CX EX FX JX LX TX WX YX{
			renvars `2'_ADM2_LOC_`g' `2'_disbcount_ADM2_LOC_`g' / `2'_ADM1_LOC_`g' `2'_disbcount_ADM1_LOC_`g'
		}
	}
	* Labeling
		label var `2'_ADM1_LOC "Value of `1' Aid disbursements per ADM1 region(weighted by number of project locations)"
		label var `2'_disbcount_ADM1_LOC " Number of non-negative `1' aid disbursements per region"
	* WorldBank specific: Allocate aid over sectors
	if "`1'"=="IDA" | "`1'"=="IBRD" | "`1'"=="China" {
		foreach g in AX BX CX EX FX JX LX TX WX YX{
			label var `2'_disbcount_ADM1_LOC_`g' " Number of non-negative `1' aid disbursements per region in sector `g'"
			label var `2'_ADM1_LOC_`g' "Value of `1' Aid per ADM1 region in sector `g' (weighted by # of proj. locations)"
		}	
	}
	save "`1'_disbursement_ADM1.dta", replace

	noisily di "7) Create balanced dataset without gaps for ADM1 and ADM2" // (assumption perfect data on aid flows, that is, if there is no data, then it is not missing but no aid at all, = 0) 
	//ADM2 level
	use "`1'_disbursement_ADM2.dta", clear
	drop DAC_1995 //drop this variable here to ensure complete merge (only needed for ADM2)
	merge m:1 ID_adm2 using gadm2.dta
	* keep only DAC countries
	* xxx Lennart 15.05.2018: Here we would loose South Sudan, Serbia and Kosovo, which were not a state yet in 1995. This is not our intention and, hence, I appended the command "keep if DAC_1995==1" to
	keep if DAC_1995 ==1 | ADM0=="Serbia" | ADM0=="Kosovo" | ADM0=="South Sudan"
	* attribute first year to regions that never received aid to prepare for tsfill
	sum transaction_year
	replace transaction_year=`r(min)' if _merge==2
	// after replacing the year with minimum year we can drop _merge
	drop _merge
	// replace missing ADM2 names if the ADM2 region is missing with ADM1 name and identifier
	replace ADM2=ADM1 if ADM2==""
	
	//Melvin H.L. Wong: 1. sort variables
	sort ID_adm2 transaction_year
	egen ID_adm2_num = group(ID_adm2)
	//Melvin H.L. Wong: 2. tsset Geounit Jahr
	tsset ID_adm2_num transaction_year
	//Melvin H.L. Wong: 3. tsfill, full
	tsfill, full //fill out data gaps
	gen years_reverse =-transaction_year
	//Melvin H.L. Wong: 4. carryforward, countryname etc
	bysort ID_adm2_num (transaction_year): carryforward ID_adm* ADM* ISO3 ID_* DAC_1995 d_miss_ADM2, replace 
	bysort ID_adm2_num (years_reverse): carryforward ID_adm* ID_adm2 ADM* ISO3 ID_* DAC_1995 d_miss_ADM2, replace
	//Melvin H.L. Wong: 5. replace Aidvvar= 0 if Aidvar==.
	replace `2'_ADM2_LOC = 0 if `2'_ADM2_LOC ==.
	replace `2'_disbcount_ADM2_LOC = 0 if `2'_disbcount_ADM2_LOC ==.
	//Lennart 08.03.2018: 6. replace Aidvvar= 0 if Aidvar<0
			replace `2'_ADM2_LOC = 0 if `2'_ADM2_LOC <0
	* WorldBank specific: Allocate aid over sectors
	if "`1'"=="IDA" | "`1'"=="IBRD" | "`1'"=="China"{		
		foreach g in AX BX CX EX FX JX LX TX WX YX{
			replace `2'_ADM2_LOC_`g' = 0 if `2'_ADM2_LOC_`g' ==.
				//Lennart 08.03.2018: replace Aidvvar= 0 if Aidvar<0
			replace `2'_ADM2_LOC_`g' = 0 if `2'_ADM2_LOC_`g' <0
			replace `2'_disbcount_ADM2_LOC_`g' = 0 if `2'_disbcount_ADM2_LOC_`g'==.
		}
	}
	* drop disbursement count for China and India, as onlya committment data
		if "`1'"=="China" | "`1'"=="India" {	
		drop *_disbcount*
	}
	drop years_reverse
	order transaction_year ID_adm*
	sort ID_adm* transaction_year
	*XXXXXX Melvin 06.01.2018: Manually checked if the sum of all allocated aid equals the sum of project aid. It does.
	save "`1'_disbursement_ADM2_tsfill.dta", replace
		
	//ADM1 level
	use "`1'_disbursement_ADM1.dta", clear
	merge m:1 ID_adm1 using gadm1.dta
	* keep only DAC countries
		* xxx Lennart 15.05.2018: Here we would loose South Sudan, Serbia and Kosovo, which were not a state yet in 1995. This is not our intention and, hence, I appended the command "keep if DAC_1995==1" to
	keep if DAC_1995 ==1 | ADM0=="Serbia" | ADM0=="Kosovo" | ADM0=="South Sudan"
	* attribute first year to regions that never received aid to prepare for tsfill
	sum transaction_year
	replace transaction_year=`r(min)' if _merge==2
	// after replacing the year with minimum year we can drop _merge
	drop _merge
	//Melvin H.L. Wong: 1. sort variables
	sort ID_adm1 transaction_year
	egen ID_adm1_num = group(ID_adm1)
	//Melvin H.L. Wong: 2. tsset Geounit Jahr
	tsset ID_adm1_num transaction_year
	//Melvin H.L. Wong: 3. tsfill, full
	tsfill, full //fill out data gaps
	gen years_reverse =-transaction_year
	//Melvin H.L. Wong: 4. carryforward, countryname etc
	bysort ID_adm1_num (transaction_year): carryforward ID_adm* ADM* ISO3 ID_* DAC_1995, replace 
	bysort ID_adm1_num (years_reverse): carryforward ID_adm* ADM* ISO3 ID_* DAC_1995, replace
	//Melvin H.L. Wong: 5. replace Aidvvar= 0 if Aidvar==.
	replace `2'_ADM1_LOC = 0 if `2'_ADM1_LOC ==.
	replace `2'_disbcount_ADM1_LOC = 0 if `2'_disbcount_ADM1_LOC ==.
	//Lennart 08.03.2018: 6. replace Aidvvar= 0 if Aidvar<0
	replace `2'_ADM1_LOC = 0 if `2'_ADM1_LOC <0
	* WorldBank specific: Allocate aid over sectors
	if "`1'"=="IDA" | "`1'"=="IBRD" | "`1'"=="China"{	
		foreach g in AX BX CX EX FX JX LX TX WX YX{
			replace `2'_ADM1_LOC_`g' = 0 if `2'_ADM1_LOC_`g' ==.
							//Lennart 08.03.2018: replace Aidvvar= 0 if Aidvar<0
			replace `2'_ADM1_LOC_`g' = 0 if `2'_ADM1_LOC_`g' <0
			replace `2'_disbcount_ADM1_LOC_`g' = 0 if `2'_disbcount_ADM1_LOC_`g'==.
		}
	}
	* drop disbursement count for China and India, as onlya committment data
		if "`1'"=="China" | "`1'"=="India" {	
		drop *_disbcount*
	}
	drop years_reverse
	order transaction_year ID_adm*
	sort ID_adm* transaction_year
	save "`1'_disbursement_ADM1_tsfill.dta", replace
	
	noisily di "8) Clean Up and delete redundant files"
	erase "`1'_disbursement_ADM1.dta"
	erase "`1'_disbursement_ADM2.dta"
	erase "`1'_disbursement_ADM2_prec13.dta"
	erase "`1'_disbursement_ADM2_prec4.dta"
	erase "`1'_disbursement_ADM1_prec4.dta"
	
	noisily di "DONE!"
	}
end
********************************************************************************

********************************************************************************
*** Define program for aid allocation using population weights
********************************************************************************

capture program drop allocAid_pop
program define allocAid_pop
**********************************************************************	
//3) Prepare population weighted data
**********************************************************************
	qui{
		***HELP FILE***
		/*
		First argument is reserved for name of donor
		Second argument is reserved for variable name of aid variable
		
		Required datasets:
			- gadm2.dta
			- 1_1_1_R_pop_GADM2.dta
			- 1_1_1_R_pop_GADM1.dta
			- `1'_disbursement.dta
		*/

		** Get stata package
		capture ssc install fs, replace
		**********************************************************************	
		//3) Prepare population weighted data
		*************************************************************************
		noisily di "1) Open `1' aid dataset"

		/*XXXXXXXXX Melvin 16.01.2018: Recode missing ADM2 regions as ADM1 */
		use gadm2.dta, clear
		* Merge with Population data
		rename ID_adm2 rid2
		merge 1:m rid2 using "$data\ADM\1_1_1_R_pop_GADM2.dta", nogen
		*XXXXXX Melvin 07.03.2018: Replace pop=0 with 1 to avoid losing aid amounts
		replace isum_pop=1 if isum_pop==0
		
		renvars year rid2 isum_pop / transaction_year ID_adm2 isum_pop_ADM2
		drop id_0 id_1 id_2 name_1 name_2 rid2_num country

		* prepare population merge by ID_adm1 year: Expand regions by year if no adm2 region
		expand 26 if transaction_year==.
		bysort ID_adm1: replace transaction_year=1989+_n if d_miss_ADM2==1

		*XXXXXX Melvin 02.01.2018: Treat missing ADM2 regions as ADM1 region: Merge ADM1 pop if ADM2 is missing
		renvars transaction_year ID_adm1 / year rid1
		merge m:1 rid1 year using "$data\ADM\1_1_1_R_pop_GADM1.dta", keep(3) nogen keepusing(isum_pop)
		*XXXXXX Melvin 07.03.2018: Replace pop=0 with 1 to avoid losing aid amounts
		replace isum_pop=1 if isum_pop==0
		
		replace isum_pop_ADM2=isum_pop if d_miss_ADM2==1
		rename isum_pop isum_pop_ADM1
		renvars year rid1 / transaction_year ID_adm1
		
		*XXXXXX Melvin 16.01.2018: merge population data with aid data
		merge 1:m ID_adm2 transaction_year using "`1'_disbursement.dta"
		*XXXXXX Melvin 31.01.2018: Rename variable to avoid merger error
		rename Disbursementcount `2'_disbcount
		*XXXXXX Melvin 16.01.2018: get ADM1 population if there is no ADM2 region as identifier because of precision code 4
		bysort ID_adm1 transaction_year: egen temp_pop=mean(isum_pop_ADM1) //get ADM1 pop in additional column
		replace isum_pop_ADM1=temp_pop if isum_pop_ADM1==. & precision_N100==4 // add missing ADM1 pop data for precisioncode 4 projects (note: "& precision_N100==4" in code is redundant but illustrates that population data is missing for those region with precision code 4)

		/* TEST HOW MUCH AID IS LOST
		duplicates drop project_id transaction_year, force
		egen lost_aid=total(transaction_value_tot) if isum_pop_ADM1==.
		egen all_aid=total(transaction_value_tot)
		sum lost_aid all_aid
		// lost aid is 2,76 bn USD (2760 million USD) almost all of them due to imprecise geocode ("water points")
		*/



		/*
		 XXXXX Lennart 10.01.2018 Generate Population on ADM1 level to allocated aid, which was coded with precision 4 based on the approach written down above:
		 Pop weighted
		- Each of the 3 ADM2 regions coded with precision 1-3 gets Pop(i)/(Sum Pop)*4/5*X 
		- The ADM1 region coded with precision 4 gets Pop(i)/(Sum Pop)*4/5*X and this is then distributed equally among the corresponding ADM2 regions
		*/

		* XXXXX Lennart 10.01.2018 Create population for weighting of precision 4(ADM1-population) and precision3 (ADM2-population) locations
		gen wpop4=isum_pop_ADM1 if precision_N100==4
		gen wpop13=isum_pop_ADM2 if precision_N100<4
		* XXXXX Lennart 10.01.2018 Create denominator of population weights
		gen wpop=isum_pop_ADM1 if precision_N100==4
		replace wpop=isum_pop_ADM2 if precision_N100<4



		//temp_totcoded: Population of locations that are precisely coded (higher than precision level 4)
		egen pop_totcoded=total(wpop), by(project_id transaction_year)

		//temp_totcoded: Population of locations that are coded with precision 4
		egen pop_totcoded4=total(wpop4) , by(project_id transaction_year)

		//temp_totcoded: Population of locations that are precisely coded (higher than precision level 4)
		egen pop_totcoded13=total(wpop13) , by(project_id transaction_year)


		* XXXXX Lennart 10.01.2018 - Save in order to continue with allocation of precision4 and precision 1-3 aid
		save "`1'_disbursement_popweights.dta", replace

		noisily di "2) Allocate aid according to the number of locations and the general share of precison-123-locations per project for ADM2 level"
		**** Generate population weighted data with precision codes 1-3 (ADM2 information) for ADM2 level**** 
		keep if precision_N100<4
		* XXXXXXXX Lennart 10.01.2018: Allocate aid according to the population of precision-123-locations in total population of project regions //
		* For instance 100 [transaction_value_tot] * 10 mio (number of population in this specific region) [wpop] / 100 (number of population in project-regions with precision1-3) [pop_totcoded]
		gen transaction_value_pop=transaction_value_tot*(wpop/pop_totcoded)
		* XXXXX Lennart 10.01.2018: Accordingly, we also apply this weighting scheme to the disbursementcounts. @ Kai & Melvin: Do you find this sensible? Or should we apply here something like number of active projects per each region.
		replace `2'_disbcount=`2'_disbcount*(wpop/pop_totcoded)
		if "`1'"=="IDA" | "`1'"=="IBRD" | "`1'"=="China"{
		* XXXXX Lennart 10.01.2018: Repeat excercise for sectoral disbursements
			foreach g in AX BX CX EX FX JX LX TX WX YX{
			gen transaction_value_pop_`g'=transaction_value_tot_`g'*(wpop/pop_totcoded)
			rename Disbursementcount_`g' `2'_disbcount_`g'
			replace `2'_disbcount_`g'=`2'_disbcount_`g'*(wpop/pop_totcoded)
			}
		}

		* Collapse on ADM2 level to aggregate all project disbursements per ADM2 region
		collapse (sum) transaction_value_pop* `2'_disbcount*, by(transaction_year ID_0 ID_1 ID_2 ID_adm1 ID_adm2)

		* Rename Variables
		renvars transaction_value_pop `2'_disbcount / `2'_ADM2_POP13 `2'_disbcount_ADM2_POP13
		if "`1'"=="IDA" | "`1'"=="IBRD" | "`1'"=="China"{
			foreach g in AX BX CX EX FX JX LX TX WX YX{
			renvars transaction_value_pop_`g' `2'_disbcount_`g' / `2'_ADM2_POP_`g'13 `2'_disbcount_ADM2_POP_`g'13
			}
		}
		save "`1'_disbursement_ADM2_POP_prec13.dta", replace 

		noisily di "3) Generate location weighted data with precision code 4 (Only ADM1 information) for ADM1 level"
		**** Generate population weighted data with precision code 4 (Only ADM1 information) for ADM1 level**** 
		use "`1'_disbursement_popweights.dta", clear
		keep if (precision_N100==4)

		*create local for time period of dataset for later use (create after droping precisioncode 4 to get relevant years to append. Some donors do not have projects for some years with precision code 4 e.g. India)
		levelsof transaction_year, local(T)
		* XXXXXXXX Lennart 10.01.2018: Allocate aid according to the population of precision-123-locations in total population of project regions //
		* For instance 100 [transaction_value_tot] * 10 mio (number of population in this specific region) [wpop] / 100 (number of population in project-regions with precision1-3) [pop_totcoded]
		gen transaction_value_pop=transaction_value_tot*(wpop/pop_totcoded)
		* XXXXX Lennart 10.01.2018: Accordingly, we also apply this weighting scheme to the `2'_disbcounts. @ Kai & Melvin: Do you find this sensible? Or should we apply here something like number of active projects per each region.
		replace `2'_disbcount=`2'_disbcount*(wpop/pop_totcoded)
		* XXXXXXX Lennart 10.01.2018: Redo Excercise for sectoral disbursements
		if "`1'"=="IDA" | "`1'"=="IBRD" | "`1'"=="China"{
			foreach g in AX BX CX EX FX JX LX TX WX YX{
			gen transaction_value_pop_`g'=transaction_value_tot_`g'*(wpop/pop_totcoded)
			rename Disbursementcount_`g' `2'_disbcount_`g'
			replace `2'_disbcount_`g'=`2'_disbcount_`g'*(wpop/pop_totcoded)
			}
		}
		collapse (sum) transaction_value_pop* `2'_disbcount*, by(transaction_year ID_0 ID_1 ID_adm1)

		* Rename Variables
		renvars transaction_value_pop `2'_disbcount / `2'_ADM1_POP4 `2'_disbcount_ADM1_POP4
		if "`1'"=="IDA" | "`1'"=="IBRD" | "`1'"=="China"{
			foreach g in AX BX CX EX FX JX LX TX WX YX{
			renvars transaction_value_pop_`g' `2'_disbcount_`g' / `2'_ADM1_POP_`g'4 `2'_disbcount_ADM1_POP_`g'4
			}
		}
		* Save Dataset
		save "`1'_disbursement_ADM1_POP_prec4.dta", replace
		
		noisily di "4) Prepare location weighted data with precision code 4 (ADM2 information)"
		* XXXXX Lennart 11.01.2018: Due to the potential for duplicates and erroneous merges, we go with classical merge-command instead of joinby on a yearly level
		foreach i in `T' {
			use "`1'_disbursement_ADM1_POP_prec4.dta", replace
			keep if transaction_year==`i'
			merge 1:m ID_adm1 using gadm2.dta, nogen keep(1 3)
			save "T_`i'.dta", replace
		}
		* Put yearly disbursements together
		clear //clear datasets first, otherwise create duplicates
		fs T_*.dta // put file names in list
		append using `r(files)' // append all files in list
		// delete single files
		foreach i in `T' {
			erase "T_`i'.dta"
		}

		* XXXX Lennart 11.01.2018: Merge with population data in order to allow for population weighted distribution of data
		renvars transaction_year ID_adm2 / year rid2
		merge m:1 rid2 year using "$data\ADM\1_1_1_R_pop_GADM2.dta", nogen keep(1 3)
		renvars year rid2 isum_pop / transaction_year ID_adm2 isum_pop_ADM2
		* XXXX Lennart 11.01.2018: As above, some projects might have no population data attributed. In order to not "loose" this aid, we could replace the data with 1 person per region.
		replace isum_pop_ADM2=1 if isum_pop_ADM2==. | isum_pop_ADM2==0

		* XXXX Lennart 11.01.2018: Split ADM1 level aid of precision 4 by population across corresponding ADM2 regions
		bysort transaction_year ID_adm1: egen isum_pop_ADM1=total(isum_pop_ADM2)

		* allocate aid with prec4 to all ADM2 regions 
		gen `2'_ADM2_POP4 =`2'_ADM1_POP4 *isum_pop_ADM2/isum_pop_ADM1


		* XXXXXX Lennart 11.01.2018: Analoguous to the location-weighted aid, disbursement / transaction count is split across locations with population weights.
		gen `2'_disbcount_ADM2_POP4=`2'_disbcount_ADM1_POP4*isum_pop_ADM2/isum_pop_ADM1
		*XXXX Lennart 11.01.2018: Redo excercise for sectoral aid
		if "`1'"=="IDA" | "`1'"=="IBRD" | "`1'"=="China" {
		foreach g in AX BX CX EX FX JX LX TX WX YX{
			gen `2'_ADM2_POP_`g'4 =`2'_ADM1_POP_`g'4  *isum_pop_ADM2/isum_pop_ADM1
			gen `2'_disbcount_ADM2_POP_`g'4=`2'_disbcount_ADM1_POP_`g'4*isum_pop_ADM2/isum_pop_ADM1
			drop `2'_ADM1_POP_`g'4 `2'_disbcount_ADM1_POP_`g'4
			}
		}
		drop `2'_ADM1_POP4 `2'_disbcount_ADM1_POP4
		* XXX Lennart 11.01.2018: Save File
		save "`1'_Disbursement_ADM2_POP_prec4.dta", replace 


		noisily di "5) Merge data with precision code 4 and precision codes 1-3 to create location weighted aid on ADM2 level"
		* XXXXXXXXXX Lennart 11.01.2018: Merge data with precision code 4 and precision codes 1-3 to create population weighted aid on ADM2 level
		use "`1'_disbursement_ADM2_POP_prec13.dta", clear

		*	XXXXXX Lennart, 11.01.2018: In the following step we drop regions for which no geographic information is available (geo-merge in GIS failed). However, this is not linked to a loss in disbursements as
		*  these regions had no aid allocated in any case as we could also not attribute population. In any case, we might want to think about fixing this, because technically these would be recipient regions.
		drop if ID_0==0 //No geographic information available for AID projects (Missing Aid information worth about 5bn USD)
			
			
		* XXXXX Lennart 11.01.2018: Rename variables with precisioncode 1-3 to prepare them for merge with precisioncode 4
		renvars `2'_ADM2_POP13 `2'_disbcount_ADM2_POP13 / `2'_ADM2_POP `2'_disbcount_ADM2_POP
		if "`1'"=="IDA" | "`1'"=="IBRD" | "`1'"=="China"{
			foreach g in AX BX CX EX FX JX LX TX WX YX{
				renvars `2'_ADM2_POP_`g'13 `2'_disbcount_ADM2_POP_`g'13 / `2'_ADM2_POP_`g'  `2'_disbcount_ADM2_POP_`g' 
			}
		}
		* Add data with precisioncode 4:
		merge 1:1 ID_adm2 transaction_year using "`1'_Disbursement_ADM2_POP_prec4.dta",  nogen
		* Replace missings
		replace `2'_ADM2_POP=0 if `2'_ADM2_POP==.
		replace `2'_ADM2_POP4=0 if `2'_ADM2_POP4 ==.
		*XXXXXX Melvin 16.01.2018: Added the following line
		replace `2'_disbcount_ADM2_POP4=0 if `2'_disbcount_ADM2_POP4==.
		* Add data up
		replace `2'_ADM2_POP=`2'_ADM2_POP+`2'_ADM2_POP4 
		replace `2'_disbcount_ADM2_POP=`2'_disbcount_ADM2_POP+`2'_disbcount_ADM2_POP4
		if "`i'"=="IDA" | "`i'"=="IBRD" | "`1'"=="China"{
			foreach g in AX BX CX EX FX JX LX TX WX YX{
				* Replace missings
				replace `2'_ADM2_POP_`g'=0 if `2'_ADM2_POP_`g'==.
				replace `2'_ADM2_POP_`g'4=0 if `2'_ADM2_POP_`g'4 ==.
			*XXXXXX Melvin 16.01.2018: Added the following two lines
				replace `2'_disbcount_ADM2_POP_`g'4=0 if `2'_disbcount_ADM2_POP_`g'4==.
				replace `2'_disbcount_ADM2_POP_`g'=0 if `2'_disbcount_ADM2_POP_`g'==.
				* Add data up
				replace `2'_ADM2_POP_`g'=`2'_ADM2_POP_`g'+`2'_ADM2_POP_`g'4
				replace `2'_disbcount_ADM2_POP_`g'=`2'_disbcount_ADM2_POP_`g'+`2'_disbcount_ADM2_POP_`g'4
				* Clean
				drop `2'_disbcount_ADM2_POP_`g'4 `2'_ADM2_POP_`g'4
			}
		}
		drop `2'_disbcount_ADM2*4 `2'_ADM2_POP*4
		drop d_miss_ADM2
		// merge ADM2 names
		merge m:1 ID_adm2 using gadm2_ids.dta, nogen keep(1 3)
		* Labeling
		label var `2'_ADM2_POP "Value of WB `1' Aid disbursements per ADM2 region(weighted by population of project locations)"
		label var `2'_disbcount_ADM2_POP " Number of non-negative `1' aid disbursements per region"
		if "`1'"=="IDA" | "`1'"=="IBRD" | "`1'"=="China"{
			foreach g in AX BX CX EX FX JX LX TX WX YX{
				label var `2'_disbcount_ADM2_POP_`g' "Number of non-negative `1' aid disbursements per region in sector `g'"
				label var `2'_ADM2_POP_`g' "Value of WB `1' Aid per ADM2 region in sector `g' (weighted by pop of locations)"
			}	
		}
		save "`1'_disbursement_ADM2_POP.dta", replace
			
		noisily di "6) Generate location weighted aid on ADM1 level by collapsing ADM1 level data"
		* XXXXXXX Lennart 11.01.2018: Generate population weighted aid on ADM1 level by collapsing ADM2 level data
		collapse (sum) `2'_ADM2_POP* `2'_disbcount_ADM2*, by(transaction_year ID_0 ID_1 ID_adm1)
		// merge ADM1 names
		merge m:1 ID_adm1 using gadm1_ids.dta, nogen keep(1 3)
		* Rename Variables
		renvars `2'_ADM2_POP `2'_disbcount_ADM2_POP / `2'_ADM1_POP `2'_disbcount_ADM1_POP
		if "`1'"=="IDA" | "`1'"=="IBRD" | "`1'"=="China"{
			foreach g in AX BX CX EX FX JX LX TX WX YX{
			renvars `2'_ADM2_POP_`g' `2'_disbcount_ADM2_POP_`g' / `2'_ADM1_POP_`g' `2'_disbcount_ADM1_POP_`g'
			}
		}
		* Labeling
			label var `2'_ADM1_POP "Value of WB `1' Aid disbursements per ADM1 region(weighted by population of project locations)"
			label var `2'_disbcount_ADM1_POP " Number of non-negative `1' aid disbursements per region"
		if "`1'"=="IDA" | "`1'"=="IBRD" | "`1'"=="China"{
			foreach g in AX BX CX EX FX JX LX TX WX YX{
				label var `2'_disbcount_ADM1_POP_`g' " Number of non-negative `1' aid disbursements per region in sector `g'"
				label var `2'_ADM1_POP_`g' "Value of WB `1' Aid per ADM1 region in sector `g' (weighted by pop of  locations)"
			}	
		}
		save "`1'_disbursement_ADM1_POP.dta", replace


		noisily di "7) Create balanced dataset without gaps for ADM1 and ADM2" // (assumption perfect data on aid flows, that is, if there is no data, then it is not missing but no aid at all, = 0) 
			****create balanced dataset without gaps (assumption perfect data on aid flows, that is, if there is no data, then it is not missing but no aid at all, = 0) 
			//ADM2 level
			use "`1'_disbursement_ADM2_POP.dta", clear
			drop DAC_1995 //drop this variable here to ensure complete merge (only needed for ADM2)
			merge m:1 ID_adm2 using gadm2.dta
			* keep only DAC countries
			* xxx Lennart 15.05.2018: Here we would loose South Sudan, Serbia and Kosovo, which were not a state yet in 1995. This is not our intention and, hence, I appended the command "keep if DAC_1995==1" to
			keep if DAC_1995 ==1 | ADM0=="Serbia" | ADM0=="Kosovo" | ADM0=="South Sudan"
			* attribute first year to regions that never received aid to prepare for tsfill
			sum transaction_year
			replace transaction_year=`r(min)' if _merge==2
			// after replacing the year with minimum year we can drop _merge
			drop _merge
			// replace missing ADM2 names if the ADM2 region is missing with ADM1 name and identifier
			replace ADM2=ADM1 if ADM2==""
			
			//Melvin H.L. Wong: 1. sort variables
			sort ID_adm2 transaction_year
			egen ID_adm2_num = group(ID_adm2)
			//Melvin H.L. Wong: 2. tsset Geounit Jahr
			tsset ID_adm2_num transaction_year
			//Melvin H.L. Wong: 3. tsfill, full
			tsfill, full //fill out data gaps
			gen years_reverse =-transaction_year
			//Melvin H.L. Wong: 4. carryforward, countryname etc
			bysort ID_adm2_num (transaction_year): carryforward ID_adm* ADM* ISO3 ID_* DAC_1995 d_miss_ADM2, replace 
			bysort ID_adm2_num (years_reverse): carryforward ID_adm* ID_adm2 ADM* ISO3 ID_* DAC_1995 d_miss_ADM2, replace
			//Melvin H.L. Wong: 5. replace Aidvvar= 0 if Aidvar==.
			replace `2'_ADM2_POP = 0 if `2'_ADM2_POP ==.
			replace `2'_disbcount_ADM2_POP = 0 if `2'_disbcount_ADM2_POP ==.
			//Lennart 08.03.2018: 6. replace Aidvvar= 0 if Aidvar<0
			replace `2'_ADM2_POP = 0 if `2'_ADM2_POP <0

			if "`1'"=="IDA" | "`1'"=="IBRD" | "`1'"=="China"{
				foreach g in AX BX CX EX FX JX LX TX WX YX{
					replace `2'_ADM2_POP_`g' = 0 if `2'_ADM2_POP_`g' ==.
					//Lennart 08.03.2018: replace Aidvvar= 0 if Aidvar<0
					replace `2'_ADM2_POP_`g' = 0 if `2'_ADM2_POP_`g' <0
					replace `2'_disbcount_ADM2_POP_`g' = 0 if `2'_disbcount_ADM2_POP_`g'==.
				}
			}
			* drop disbursement count for China and India, as onlya committment data
				if "`1'"=="China" | "`1'"=="India" {	
				drop *_disbcount*
			}
			drop years_reverse isum_* country id_0 id_1 id_2 name_1 name_2 rid2_num
			order transaction_year ID_adm*
			sort ID_adm* transaction_year
			save "`1'_disbursement_ADM2_POP_tsfill.dta", replace
			
			//ADM1 level

			use "`1'_disbursement_ADM1_POP.dta", clear
			merge m:1 ID_adm1 using gadm1.dta
			* keep only DAC countries
			* xxx Lennart 15.05.2018: Here we would loose South Sudan, Serbia and Kosovo, which were not a state yet in 1995. This is not our intention and, hence, I appended the command "keep if DAC_1995==1" to
			keep if DAC_1995 ==1 | ADM0=="Serbia" | ADM0=="Kosovo" | ADM0=="South Sudan"
			* attribute first year to regions that never received aid to prepare for tsfill
			sum transaction_year
			replace transaction_year=`r(min)' if _merge==2
			// after replacing the year with minimum year we can drop _merge
			drop _merge
			
		
		
			//Melvin H.L. Wong: 1. sort variables
			sort ID_adm1 transaction_year
			egen ID_adm1_num = group(ID_adm1)
			//Melvin H.L. Wong: 2. tsset Geounit Jahr
			tsset ID_adm1_num transaction_year
			//Melvin H.L. Wong: 3. tsfill, full
			tsfill, full //fill out data gaps
			gen years_reverse =-transaction_year
			//Melvin H.L. Wong: 4. carryforward, countryname etc
			bysort ID_adm1_num (transaction_year): carryforward ID_adm* ADM* ISO3 ID_* DAC_1995, replace 
			bysort ID_adm1_num (years_reverse): carryforward ID_adm* ADM* ISO3 ID_* DAC_1995, replace
			//Melvin H.L. Wong: 5. replace Aidvvar= 0 if Aidvar==.
			replace `2'_ADM1_POP = 0 if `2'_ADM1_POP ==.
			//Lennart 08.03.2018: replace Aidvvar= 0 if Aidvar<0
			replace `2'_ADM1_POP = 0 if `2'_ADM1_POP <0
			replace `2'_disbcount_ADM1_POP = 0 if `2'_disbcount_ADM1_POP ==.
			if "`1'"=="IDA" | "`1'"=="IBRD" | "`1'"=="China"{
				foreach g in AX BX CX EX FX JX LX TX WX YX{
					replace `2'_ADM1_POP_`g' = 0 if `2'_ADM1_POP_`g' ==.
							//Lennart 08.03.2018: replace Aidvvar= 0 if Aidvar<0
					replace `2'_ADM1_POP_`g' = 0 if `2'_ADM1_POP_`g' <0
					replace `2'_disbcount_ADM1_POP_`g' = 0 if `2'_disbcount_ADM1_POP_`g'==.
				}
			}
			* drop disbursement count for China and India, as onlya committment data WBAID_ADM1_POP_LX
				if "`1'"=="China" | "`1'"=="India" {	
				drop *_disbcount*
			}
			drop years_reverse ID_adm1_num
			order transaction_year ID_adm*
			sort ID_adm* transaction_year
			save "`1'_disbursement_ADM1_POP_tsfill.dta", replace
		/* XXXXXXXX Lennart 11.01.2018: We have 100 million more total aid (0.15% of aid)  with population than with location weights. We should at least double check
		 use "`1'_disbursement_ADM1_POP_tsfill.dta", clear
		 egen total=total(`2'_ADM1_POP)
		 sum total
		 use "`1'_disbursement_ADM1_tsfill.dta", clear
		  egen total=total(`2'_ADM1_LOC)
		 sum total
		 */


			noisily di "8) Clean Up and delete redundant files"
			erase "`1'_disbursement_ADM1_POP.dta"
			erase "`1'_disbursement_ADM2_POP.dta"
			erase "`1'_Disbursement_ADM2_POP_prec4.dta"
			erase "`1'_Disbursement_ADM2_POP_prec13.dta"
			erase "`1'_Disbursement_ADM1_POP_prec4.dta"
			erase "`1'_disbursement_popweights.dta"
			
			noisily di "DONE!"
	}
end
********************************************************************************
**************************
**************************
* 0: General regional Data *
**************************
**************************

*** DAC Country List of 1995 to define sample ***
import excel using "$rawdata\Country Classification\DAC_1995.xlsx", firstrow clear
rename Economy ADM0
duplicates drop ADM0, force
gen DAC_1995=1
replace ADM0="Bahamas" if ADM0=="Bahamas "
replace ADM="Kuwait" if ADM0=="Kuwait "
* drop an empty variable from the excel file
drop B
label var DAC_1995 "Dummy variable if country was on the DAC recipient list in 1995"
save "DAC.dta", replace
* Prepare GADM2 regions data in order to be able to attribute Precision Code 4 (ADM1 data) to subregions

/*XXXXXXXXX Melvin 08.11.2017: Correct error in CSV file. Contained empty spaces 
which lead to incorrect import of CSV data*/
* Load GADM Data
import delim using "$data\ADM\gadm28.csv", clear 
keep objectidn100 isoc3 id_0n100 name_0c75 id_1n100 name_1c75 name_2c75 id_2n100
renvars objectidn100 isoc3  name_0c75 name_1c75 name_2c75  / OBJECTID ISO3 ADM0 ADM1 ADM2 
* Generate unique identifier for each ADM region:
gen c = "c"
gen r = "r"
egen ID_adm1 = concat(c id_0n100 r id_1n100)
egen ID_adm2 = concat(c id_0n100 r id_1n100 r id_2n100)
drop c r OBJECTID
rename id_0n100 ID_0N100
rename id_1n100 ID_1N100
rename id_2n100 ID_2N100
label var ID_adm2 "Unique identifier for ADM2 region"

/*XXXXXXXXX Melvin 08.11.2017: Recode missing ADM2 regions as ADM1 */
gen byte d_miss_ADM2=(ADM2=="")
replace ADM2=ADM1 if d_miss_ADM2==1

/*XXXXXXXXX Melvin 08.11.2017: After correction of CSV file don't need this section anymore
duplicates drop OBJECTID, force
* 7 Regions are coded wrongly and are dropped
drop if ISO3==""
*/
* Unify country names
replace ADM0="Côte d'Ivoire" if ADM0=="C??te d'Ivoire"
replace ADM0="St Helena" if ADM0=="Saint Helena"
replace ADM0="Kazakstan" if ADM0=="Kazakhstan"
replace ADM0="Korea, Democratic Republic of" if ADM0=="North Korea"
replace ADM0="Korea, Rep. of" if ADM0=="South Korea"
replace ADM0="Kyrgyz Rep." if ADM0=="Kyrgyzstan"
replace ADM0="Palestinian Administered Areas" if ADM0=="Palestina"
replace ADM0="Sao Tome and Principe" if ADM0=="S?úo Tom?® and Pr?¡ncipe"
replace ADM0="Sao Tome and Principe" if ADM0=="S?ï¿½o Tom?ï¿½ and Pr?ï¿½ncipe"
replace ADM0="Congo, Dem. Rep." if ADM0=="Democratic Republic of the Congo" 
replace ADM0="Congo, Rep." if ADM0=="Republic of Congo"
replace ADM0="East Timor" if ADM0=="Timor-Leste"
replace ADM0="Viet Nam" if ADM0=="Vietnam"
replace ADM0="Chinese Taipei" if ADM0=="Taiwan"
replace ADM0="Saint Barthelemy" if ADM0=="Saint-Barth?ï¿½lemy"
replace ADM0="Aland" if ADM0=="?ï¿½land"
replace ADM0="St Vincent & Grenadines" if ADM0=="Saint Vincent and the Grenadines"
replace ADM0="St Lucia" if ADM0=="Saint Lucia"

merge m:1 ADM0 using DAC.dta
/* The following countries are dropped from the DAC list
- Netherlands Antilles
- Northern Marianas
- Palau Islands
- Virgin Islands (UK)
- Yugoslavia, Federal Republic
*/
drop if _merge==2
drop _merge
replace DAC_1995=0 if DAC_1995==.
preserve
keep ID_0N100 ISO3 ADM0 ID_1N100 ADM1 ID_adm1 DAC_1995 //keep only ADM1 information in preserve mode to avoid mismatches later on
duplicates drop ID_adm1 ADM0 ADM1, force
save gadm1.dta, replace
restore

* drop mulitple entries due to multiple polygons for same region. ok to drop here, since we are interested in the region's existence and not single polygons comprising them.
duplicates drop ID_adm2 ID_adm1 ADM0 ADM1 ADM2, force
save gadm2.dta, replace

/* XXXXXXXXX Melvin 18.01.2018: In correspondence with Lennart:
Create file containing unique ID_adm1 AND ADM1 identifiers. Some ADM1 regions are encoded
differently which causes duplicates when collpsing by ADM1
e.g. ID_adm1=="c250r59"
*/
//generate dataset with ADM1 names
use gadm2.dta, clear
duplicates drop ID_adm1 ISO3 ADM0 ADM1, force
keep ID_adm1 ISO3 ADM0 ADM1
save gadm1_ids.dta, replace
//generate dataset with ADM2 names
use gadm2.dta, clear
duplicates drop ID_adm2 ISO3 ADM0 ADM1 ADM2, force
keep ID_adm2 ISO3 ADM0 ADM1 ADM2
save gadm2_ids.dta, replace

* Create yearly population totals
*XXXXXXXXX Melvin 30.01.2017: Population data has been updated. Major errors corrected for GAMD1 and GADM2
*XXXXXXXXX Melvin 30.01.2017: GREG population data is outstanding and has to be computed
use "$data\ADM\1_1_1_R_pop_GADM1.dta", clear
rename country isoc3
rename isum_pop isum_pop_ADM1

collapse (sum) isum_pop_ADM1, by(isoc3 year)
renvars isum_pop year / c_pop transaction_year
label var c_pop "Total Country Population"
save country_pop, replace

/*
Source of population data. Authors' own calculation based on GPW4 data
Center for International Earth Science Information Network - CIESIN - Columbia University. 2016. Gridded Population of the World, Version 4 (GPWv4): Population Count Adjusted to Match 2015 Revision of UN WPP Country Totals. Palisades, NY: NASA Socioeconomic Data and Applications Center (SEDAC). http://dx.doi.org/10.7927/H4SF2T42. Accessed 01.Jan 2017.
*/


**************************
**************************
* A: Create IDA Aid Data *
**************************
**************************
***Prepare aid data***
import delimited using "$data\Aid\projects_ancillary.csv", clear delimiter(",") //contains sector information
* Drop duplicates as these relate only to IEG Evaluations, which we do not consider here
*XXXXXX Melvin 29.12.2017: Checked the duplicates. Ok to use duplicates drop
duplicates drop projectid, force
save ancillary.dta, replace
** Import matches from AidData-GADM spatial join (Needs to be import excel as important information are lost, if delimited (.csv) is used.) 
import excel using "$data\Aid\alg.xls", firstrow clear
rename project_idC254 projectid
merge m:1 projectid using ancillary.dta, nogen keep(1 3) //no mismatch from master (melvin 29.12.2017)
*XXXXXX Melvin 29.12.2017: @Lennart. Is this comment still relevant?
* XXXXXX Lennart 03.01.2018: @ Melvin: Moved it upwards as it is not needed here anymore.
keep mjsector* sector*pct projectid project_loC254 precision_N100 geoname_idN100 latitudeN1911 longitudeN1911 location_tC254 location_1C254 ISOC3 NAME_0C75  NAME_1C75  NAME_2C75 ID_0N100 ID_1N100 ID_2N100
*XXXXXX Melvin 29.12.2017: destring is not needed anymore
*destring, dpcomma replace
rename projectid project_id
rename latitudeN1911 latitutde
rename longitudeN1911 longitude
rename NAME_0C75 ADM0
rename NAME_1C75 ADM1
rename NAME_2C75 ADM2
rename ISOC3 ISO3
/*
The spatial join in ArcGIS does not consider if the aid loaction points match the precision
of the map provided. That is, If there are aid points with precision codes only valid for ADM1 but 
an ADM2 map is given, ArcGIS assumes that the point belongs to the underlying ADM2 region,
despite the arbitrary point setting, most likely the cetroid of the ADM1 region.

Thus, if the precision codes are only valid for ADM1, we must delete the existing
ADM2 IDs to avoid erroreneous merges later.
*/
replace ADM2="" if precision_N100>=4
replace ID_2N100=. if precision_N100>=4

*create unique region ids
gen c = "c"
gen r = "r"
egen ID_adm1 = concat(c ID_0N100 r ID_1N100)
egen ID_adm2 = concat(c ID_0N100 r ID_1N100 r ID_2N100)
/*XXXXXX Melvin 29.12.2017: Note that 512 out of 61440 obs are unmatched with countries and lost
about 250 have precision codes higher equal 4. 
Small measurement error but may be solved using nearest region algorithm.
Stata code: tab precision_N100 if ID_0N100==0
*/
drop c r
sort project_id
save "alg.dta", replace

********************************************************************************
//1) Create yearly disbursements data discounted by information loss
********************************************************************************
/* 
(only until 2012 as we do not have disbursement data in subsequent years)
This dataset will contain the variable transaction_value_tot
which the total project aid for each year, discounted by the informtion loss due to
imprecise geo-codes.
*/
forvalues i=1995(1)2012 {
import excel "$data\Aid\IDA_IBRD_transactions.xlsx", firstrow clear
/*XXXXXX Melvin 05.01.2018 @Kai disbursements are sometimes negative. Should we drop them to avoid miscounting locations?
I checked the WorldBank transaction file for Project P000603

Jun, 2009	IDAN0310	Fees			37.788,94
Jun, 2009	IDAN0310	Repayment		78.302,85
Sep, 2009	IDAN0310	Disbursement	-92.437,99
Okt, 2009	IDAN0310	Disbursement	-39.091,19
Nov, 2009	IDAN0310	Fees			29.005,87
Nov, 2009	IDAN0310	Repayment		81.084,39
Nov, 2009	IDAN0310	Cancellations	170.268,45

Is this an error of the WB? Unlikely, as there are more than 200 Project_id containing negative disbursment amounts. What is your opinion?
*/
drop if transactionvalue<0 //4346 out of 149848 transaction coded as missing (about 3%)

renvars projectid year transactionvalue/  project_id transaction_year transaction_value
keep if financier=="IDA"
keep project_id transaction_year transaction_value
drop if transaction_year!=`i'
egen transaction_value_tot=total(transaction_value), by( project_id)  
label variable transaction_value_tot "Total value per project per year"
* Generate count variable for number of positive project disbursements
gen count=1 if transaction_value>0  
egen Disbursementcount=total(count), by(project_id transaction_year)
label var Disbursementcount "Sum of yearly positive disbursements within project" 
drop transaction_value
/*XXXXXX Melvin 29.12.2017: @Lennart, do you happen to know why some projects 
are not matched? e.g. project P008275 in the year1995 with 6 disbursments.
*/
collapse (mean) transaction_value_tot Disbursementcount, by(project_id transaction_year)

merge 1:m project_id using "alg.dta", nogen keep(3 1)

* XXXXXXXXX Lennart 04.01.2018: The part below is to a large extent recoded. Reviewing is, hence, necessary. Thanks!
/* 
Now, allocate aid flows that do not correspond to a certain administrative area in the following way
If there are 
5 locations, where 3 are geocoded on precision level 1-3 and 1 is geocoded on precision level 4, 1 is coded less precisely
Projectsum is X
Take 4/5*X as the amount to be totally allocated, thus 1/5X is lost in the data
Location weighted
- Each of the 3 ADM2 regions coded with precision 1-3  gets 1/5*X
- The ADM1 region coded with precision 4 gets 1/5*X and this is then distributed equally amongst the corresponding ADM2 regions
Pop weighted
- Each of the 3 ADM2 regions coded with precision 1-3 gets Pop(i)/(Sum Pop)*4/5*X 
- The ADM1 region coded with precision 4 gets Pop(i)/(Sum Pop)*4/5*X and this is then distributed equally amongst the corresponding ADM2 regions

An analogous approach is applied to the disbursementcount / transaction count (e.g., the number of projectwise transactions from the IDA account to the project). Although the interpretation might not be intuitive (e.g., a project
with 2 transactions might have 10 locations, so we have a disbursementcount of 0.2), it is most closely comparable to the USD amounts. Alternatively, we could think about numbers of active projects.
Example code
gen temp_totlocation =																//Number of locations of entire project
gen temp_totcoded = 																//Number of locations that are precisely coded (higher than precision level 5)
gen temp_totcoded4 = 																//Number of locations that are precisely coded (precision level 4)
gen temp_totcoded13 = 																//Number of locations that are precisely coded (higher than precision level 4)
gen temp_projsum = temp_totcoded/temp_totlocation*transaction_value_tot				//Total amount of project amount to be allocated to different regions
*/

//temp_totlocation: Number of locations with positive project disbursements for entire project year
gen count=1
egen temp_totlocation=total(count), by(project_id transaction_year)
drop count

//temp_totcoded: Number of locations that are precisely coded (higher than precision level 4)
gen count=1 if precision_N100<=4
egen temp_totcoded=total(count), by(project_id transaction_year)
drop count

//temp_totcoded: Number of locations that are coded with precision 4
gen count=1 if precision_N100==4
egen temp_totcoded4=total(count), by(project_id transaction_year)
drop count

//temp_totcoded: Number of locations that are precisely coded (higher than precision level 4)
gen count=1 if precision_N100<4
egen temp_totcoded13=total(count), by(project_id transaction_year)
drop count

//temp_projsum: Total amount of project amount to be allocated to different regions after discounting for information loss
rename transaction_value_tot temp_value																
gen transaction_value_tot= temp_totcoded/temp_totlocation*temp_value		

* Create sectoral disbursements (& counts)
*XXXXXX Melvin 29.12.2017: @Lennart, what is the purpose of this? Could you comment the steps? e.g. why do disbursemnt count sum up?
* XXXXX Lennart 03.01.2018: @ Melvin: I now coded it as a tempfile, so the transformation stays more tracable 
forvalues g=1(1)5 {
*XXXXXX Melvin 05.01.2018: @Lennart, could you ellaborate on the following line?
gen aux`g'pct=sector`g'pct*transaction_value_tot*0.01
}
* Sum up disbursement amounts of different purposes as these are ranked by percentage share in total disbursement (e.g., sometimes education might be mjsector1 for a schooling project, but for the next project of a new apprenticeship program only mjsector2)
foreach g in AX BX CX EX FX JX LX TX WX YX{
gen Disbursementcount_`g'=0
forvalues t=1(1)5 {
gen aux`t'=0
replace aux`t'=aux`t'pct if mjsector`t'code=="`g'"
replace Disbursementcount_`g'=Disbursementcount_`g'+Disbursementcount if mjsector`t'code=="`g'"
}
* XXXX Lennart 03.01.2018: Add all sectoral shares times total disbursement amount up as we need to go through the whole ranking (e.g., sometimes sanitation is the first sector, but sometimes only the fifth). 
gen transaction_value_tot_`g'=aux1+aux2+aux3+aux4+aux5
drop aux1 aux2 aux3 aux4 aux5
}
drop aux*
save `i'.dta, replace 
}

* Put yearly disbursements together
clear
use 1995.dta, clear
forvalues i=1996(1)2012 {
append using `i'.dta
erase `i'.dta
}
erase 1995.dta

keep if (precision_N100<=4) //note: more than 92% are coded higher than 4 at this stage
sort project_id transaction_year
save "IDA_disbursement.dta", replace

********************************************************************************
//2) Prepare location weighted data
********************************************************************************
cd "$cwd" 
* Execute program to allocate IDA aid  using location weights
allocAid IDA WBAID


**********************************************************************	
//3) Prepare population weighted data
**********************************************************************
cd "$cwd" 
* Execute program to allocate IDA aid using population weights
allocAid_pop IDA WBAID



/*
****************************************************
* Generate location weighted Aid in adjacent regions 
****************************************************
** ADM2
* Load adjacency matrix for ADM2 regions
import excel using "$data\ADM\adm2_neighbors.xls", firstrow clear  //This dataset was created via adjacent_adm_classification.py. We use this adjacency matrix to match each ADM region with the disbursements in adjacent ADM regions.
* Drop Adjacent Regions in other country
drop if src_Name_0C75!= nbr_NAME_0C75
rename nbr_ID_ADMC12 ID_adm2
save "$data\ADM\adm2_neighbors.dta", replace

use "$data\ADM\1_1_1_R_pop_GADM1.dta", clear
renvars year rid1 isum_pop / transaction_year ID_adm1 isum_pop_ADM1
save ADM1POP.dta, replace

use "$data\ADM\1_1_1_R_pop_GADM2.dta", clear
renvars year rid2 isum_pop / transaction_year ID_adm2 isum_pop_ADM2
save ADM2POP.dta, replace


* Merge Adjacency matrix with Aid Disbursements in adjacent regions
forvalues i=1995(1)2012 {
use "IDA_disbursement_ADM2_tsfill.dta", clear // The disbursements are matched in this step with the adjacent regions. Afterwards we collapse to receive the sum of the WB Aid in adjacent regions.
drop if transaction_year!=`i' 
* Mege with adjacent region
merge 1:m ID_adm2 using "$data\ADM\adm2_neighbors.dta", nogen keep(3 1)
* Merge with data on population 
merge m:1 ID_adm2 transaction_year using `ADM2POP', nogen keep(3 1)
* Collapse to get sum of WB aid (projects) in adjacent regions
collapse (sum) WBAID_ADM2* Disbursementcount* isum_pop_ADM2, by(src_ID_admC12 transaction_year)
renvars isum_pop_ADM2 src_ID_admC12 / Population_ADM2_ADJ ID_adm2
duplicates report ID_adm2
* Rename Variables to indicate that they are in the adjacent regions
renvars WBAID_ADM2 WBAID_ADM2_1loc / WBAID_ADM2_ADJ WBAID_ADM2_1loc_ADJ
rename Disbursementcount_ADM2 Disbursementcount_ADM2_ADJ
foreach g in AX BX CX EX FX JX LX TX WX YX{
renvars WBAID_ADM2_`g' WBAID_ADM2_1loc_`g' / WBAID_ADM2_ADJ_`g'  WBAID_ADM2_1loc_ADJ_`g'
rename Disbursementcount_ADM2_`g' Disbursementcount_ADM2_ADJ_`g'
}
tempfile `i'
save `i', replace 
}
* Put yearly disbursements in adjacent regions together
clear
use 1995
forvalues i=1996(1)2012 {
append using `i'
}

keep ID_adm2 WBAID_ADM2_*ADJ* Population_ADM2_ADJ transaction_year Disbursementcount_ADM2*

* Merge Disbursement file with Disbursements in adjacent ADM2 regions
merge 1:1 ID_adm2 transaction_year using "IDA_disbursement_ADM2_tsfill.dta", nogen keep(2 3)  
* Label variables
label var Population_ADM2_ADJ "Population in all adjacent ADM2 Regions"
label var WBAID_ADM2_ADJ "World Bank aid in all adjacent ADM2 regions"
label var WBAID_ADM2_1loc_ADJ "World Bank aid allocated to only 1 project location in all adjacent ADM2 regions"
label var Disbursementcount_ADM2_ADJ "No. of non-negative WB aid disbursements in adjacent ADM2 regions"
foreach g in AX BX CX EX FX JX LX TX WX YX{
label var WBAID_ADM2_ADJ_`g' "World Bank aid in all adjacent ADM2 regions in sector `g'"
label var WBAID_ADM2_1loc_ADJ_`g' "World Bank aid allocated to only 1 project location in all adjacent ADM2 regions in sector `g'"
label var Disbursementcount_ADM2_ADJ_`g' "No. of non-negative WB aid disbursements in adjacent ADM2 regions in sector `g'"
}
* Rename Variables as location weighted
renvars WBAID_ADM2 WBAID_ADM2_ADJ / WBAID_ADM2_LOC WBAID_ADM2_LOC_ADJ
foreach x in AX BX CX EX FX JX LX TX WX YX{
renvars WBAID_ADM2_`x' WBAID_ADM2_ADJ_`x' / WBAID_ADM2_LOC_`x' WBAID_ADM2_LOC_ADJ_`x'
}
save "IDA_disbursement_ADM2_adjacent.dta", replace 


** ADM1
* Load adjacency matrix for ADM1 regions
import excel using "$data\ADM\adm1_neighbors.xls", firstrow clear
renvars src_ID_0N100 src_ID_1N100  nbr_ID_0N100 nbr_ID_1N100 / src_id_0 src_id_1 nbr_id_0 nbr_id_1
*create unique region ids
gen c = "c"
gen r = "r"
egen src_ID_admC7 = concat(c src_id_0 r src_id_1)
egen nbr_ID_admC7 = concat(c nbr_id_0 r nbr_id_1)
drop c r
rename nbr_ID_admC7 ID_adm1

* Drop Adjacent Regions in other country
drop if src_Name_0C75!= nbr_NAME_0C75
save "$data\ADM\adm1_neighbors.dta", replace

* Merge Adjacency matrix with Aid Disbursements in adjacent regions
forvalues i=1995(1)2012 {
use "IDA_disbursement_ADM1_tsfill.dta", clear     // The disbursements are matched in this step with the adjacent regions. Afterwards we collapse to receive the sum of the WB Aid in adjacent regions.         
drop if transaction_year!=`i'
* Merge with adjacent regions						
merge 1:m ID_adm1 using "$data\ADM\adm1_neighbors.dta", nogen keep(1 3)
* Merge with population Data
merge m:1 ID_adm1 transaction_year using `ADM1POP', nogen keep(3 1)
* Collapse to get sum of WB aid (projects) in adjacent regions
collapse (sum) WBAID_ADM1* Disbursementcount* isum_pop_ADM1, by(src_ID_admC7 transaction_year)
* Rename Variables to indicate that they are in the adjacent regions
renvars isum_pop_ADM1 src_ID_admC7 / Population_ADM1_ADJ ID_adm1
renvars WBAID_ADM1 WBAID_ADM1_1loc / WBAID_ADM1_ADJ WBAID_ADM1_1loc_ADJ
rename Disbursementcount_ADM1 Disbursementcount_ADM1_ADJ
foreach g in AX BX CX EX FX JX LX TX WX YX{
renvars WBAID_ADM1_`g' WBAID_ADM1_1loc_`g' / WBAID_ADM1_ADJ_`g'  WBAID_ADM1_1loc_ADJ_`g'
rename Disbursementcount_ADM1_`g' Disbursementcount_ADM1_ADJ_`g'
}
save `i'.dta, replace 
}
* Put yearly disbursements together
clear
use 1995
forvalues i=1996(1)2012 {
append using `i'.dta
erase `i'.dta
}
erase 1995.dta
keep ID_adm1 WBAID_ADM1_*ADJ*  Disbursementcount* transaction_year Population_ADM1_ADJ
* Merge Disbursement file with Disbursements in adjacent ADM1 regions
merge 1:m ID_adm1 transaction_year using "IDA_disbursement_ADM1_tsfill.dta", nogen keep(2 3)
* Label all variables
label var Population_ADM1_ADJ "Population in neighboring regions"
label var WBAID_ADM1_ADJ "World Bank aid in all adjacent ADM1 regions"
label var WBAID_ADM1_1loc_ADJ "World Bank aid allocated to only 1 project location in all adjacent ADM2 regions"
label var Disbursementcount_ADM1_ADJ "No. of non-negative WB aid disbursements in adjacent ADM1 regions"
foreach g in AX BX CX EX FX JX LX TX WX YX{
label var WBAID_ADM1_ADJ_`g' "World Bank aid in all adjacent ADM1 regions in sector `g'"
label var WBAID_ADM1_1loc_ADJ_`g' "World Bank aid allocated to only 1 project in all adjacent ADM1 regions in sector `g'"
label var Disbursementcount_ADM1_ADJ_`g' "No. of non-negative WB aid disbursements in adjacent ADM1 regions in sector `g'"
}
* Rename Variables as location weighted
renvars WBAID_ADM1 WBAID_ADM1_ADJ / WBAID_ADM1_LOC WBAID_ADM1_LOC_ADJ
foreach x in AX BX CX EX FX JX LX TX WX YX{
renvars WBAID_ADM1_`x' WBAID_ADM1_ADJ_`x' / WBAID_ADM1_LOC_`x' WBAID_ADM1_LOC_ADJ_`x'
}
save "IDA_disbursement_ADM1_adjacent.dta", replace


****************************************************
* Generate population weighted Aid in adjacent regions 
****************************************************
** ADM2

* Merge Adjacency matrix with Aid Disbursements in adjacent regions
forvalues i=1995(1)2012 {
use "IDA_disbursement_ADM2_Wpop.dta", clear // The disbursements are matched in this step with the adjacent regions. Afterwards we collapse to receive the sum of the WB Aid in adjacent regions.
drop if transaction_year!=`i' 
* Mege with adjacent region
merge 1:m ID_adm2 using "$data\ADM\adm2_neighbors.dta", nogen keep(3 1)
* Merge with population Data
merge m:1 ID_adm2 transaction_year using `ADM2POP', nogen keep(3 1)
* Collapse to get sum of WB aid (projects) in adjacent regions
collapse (sum) WBAID_ADM2* Disbursementcount* isum_pop_ADM2, by(src_ID_admC12 transaction_year)
renvars isum_pop_ADM2 src_ID_admC12 / Population_ADM2_ADJ ID_adm2
duplicates report ID_adm2
* Rename Variables to indicate that they are in the adjacent regions
rename WBAID_ADM2_Wpop WBAID_ADM2_Wpop_ADJ
rename Disbursementcount_ADM2 Disbursementcount_ADM2_ADJ
foreach g in AX BX CX EX FX JX LX TX WX YX{
rename WBAID_ADM2_Wpop_`g' WBAID_ADM2_Wpop_ADJ_`g'
rename Disbursementcount_ADM2_`g' Disbursementcount_ADM2_ADJ_`g'
}
save `i'.dta, replace 
}
* Put yearly disbursements in adjacent regions together
clear
use 1995
forvalues i=1996(1)2012 {
append using `i'.dta
erase `i'.dta
}
erase 1995.dta

keep ID_adm2 WBAID_ADM2_Wpop_ADJ* transaction_year Disbursementcount_ADM2* Population_ADM2_ADJ

* Merge Disbursement file with Disbursements in adjacent ADM2 regions
merge 1:1 ID_adm2 transaction_year using "IDA_disbursement_ADM2_Wpop.dta", nogen keep(2 3)  
* Label variables
label var Population_ADM2_ADJ "Population in neighboring regions"
label var WBAID_ADM2_Wpop_ADJ "Pop. weighted World Bank aid in all adjacent ADM2 regions"
label var Disbursementcount_ADM2_ADJ "No. of non-negative WB aid disbursements in adjacent ADM2 regions"
foreach g in AX BX CX EX FX JX LX TX WX YX{
label var WBAID_ADM2_Wpop_ADJ "Pop. Weighted World Bank aid in all adjacent ADM2 regions in sector `g'"
label var Disbursementcount_ADM2_`g' "No. of non-negative WB aid disbursements in adjacent ADM2 regions in sector `g'"
}
save "IDA_disbursement_ADM2_Wpop_adjacent.dta", replace 


** ADM1

* Merge Adjacency matrix with Aid Disbursements in adjacent regions
forvalues i=1995(1)2012 {
use "IDA_disbursement_ADM1_Wpop.dta", clear     // The disbursements are matched in this step with the adjacent regions. Afterwards we collapse to receive the sum of the WB Aid in adjacent regions.         
drop if transaction_year!=`i'
* Merge with adjacent regions						
merge 1:m ID_adm1 using "$data\ADM\adm1_neighbors.dta", nogen keep(1 3)
* Merge with population Data
merge m:1 ID_adm1 transaction_year using `ADM1POP', nogen keep(3 1)
* Collapse to get sum of WB aid (projects) in adjacent regions
collapse (sum) WBAID_ADM1* Disbursementcount* isum_pop_ADM1, by(src_ID_admC7 transaction_year)
* Rename Variables to indicate that they are in the adjacent regions
rename src_ID_admC7 ID_adm1
renvars isum_pop_ADM1 WBAID_ADM1_Wpop / Population_ADM1_ADJ WBAID_ADM1_Wpop_ADJ
rename Disbursementcount_ADM1 Disbursementcount_ADM1_ADJ
foreach g in AX BX CX EX FX JX LX TX WX YX{
rename WBAID_ADM1_Wpop_`g' WBAID_ADM1_Wpop_ADJ_`g'
rename Disbursementcount_ADM1_`g' Disbursementcount_ADM1_ADJ_`g'
}
save `i'.dta, replace 
}
* Put yearly disbursements together
clear
use 1995
forvalues i=1996(1)2012 {
append using `i'.dta
erase `i'.dta
}
erase 1995.dta

keep ID_adm1 WBAID_ADM1_Wpop_ADJ*  Disbursementcount* transaction_year Population_ADM1_ADJ
* Merge Disbursement file with Disbursements in adjacent ADM1 regions
merge 1:m ID_adm1 transaction_year using "IDA_disbursement_ADM1_Wpop.dta", nogen keep(2 3)
* Label all variables
label var Population_ADM1_ADJ "Population in neighboring regions"
label var WBAID_ADM1_Wpop_ADJ "Pop. Weighted World Bank aid in all adjacent ADM1 regions"
label var Disbursementcount_ADM1_ADJ "No. of non-negative WB aid disbursements in adjacent ADM1 regions"
foreach g in AX BX CX EX FX JX LX TX WX YX{
label var WBAID_ADM1_Wpop_ADJ_`g' "Pop. Weighted World Bank aid in all adjacent ADM1 regions in sector `g'"
label var Disbursementcount_ADM1_ADJ_`g' "No. of non-negative WB aid disbursements in adjacent ADM1 regions in sector `g'"
}
save "IDA_disbursement_ADM1_Wpop_adjacent.dta", replace
*/

*******************************
* B: Create IBRD Finance Data *
*******************************


cd "$cwd"

*XXXXXX Melvin 09.01.2018: Copied the whole section from IDA above
* Create yearly disbursements (only until 2012 as we do not have disbursement data in subsequent years)
forvalues i=1995(1)2012 {
import excel "$data\Aid\IDA_IBRD_transactions.xlsx", firstrow clear

drop if transactionvalue<0 //4346 out of 149848 transaction coded as missing (about 3%)

renvars projectid year transactionvalue/  project_id transaction_year transaction_value
keep if financier=="IBRD"
keep project_id transaction_year transaction_value
drop if transaction_year!=`i'
egen transaction_value_tot=total(transaction_value), by( project_id)  
label variable transaction_value_tot "Total value per project per year"
* Generate count variable for number of positive project disbursements
gen count=1 if transaction_value>0  
egen Disbursementcount=total(count), by(project_id transaction_year)
label var Disbursementcount "Sum of yearly positive disbursements within project" 
drop transaction_value

collapse (mean) transaction_value_tot Disbursementcount, by(project_id transaction_year)
merge 1:m project_id using "alg.dta", nogen keep(3 1)

//temp_totlocation: Number of locations with positive project disbursements for entire project year
gen count=1
egen temp_totlocation=total(count), by(project_id transaction_year)
drop count

//temp_totcoded: Number of locations that are precisely coded (higher than precision level 4)
gen count=1 if precision_N100<=4
egen temp_totcoded=total(count), by(project_id transaction_year)
drop count

//temp_totcoded: Number of locations that are coded with precision 4
gen count=1 if precision_N100==4
egen temp_totcoded4=total(count), by(project_id transaction_year)
drop count

//temp_totcoded: Number of locations that are precisely coded (higher than precision level 4)
gen count=1 if precision_N100<4
egen temp_totcoded13=total(count), by(project_id transaction_year)
drop count

//temp_projsum: Total amount of project amount to be allocated to different regions after discounting for information loss
rename transaction_value_tot temp_value																
gen transaction_value_tot= temp_totcoded/temp_totlocation*temp_value		

* Create sectoral disbursements (& counts)
forvalues g=1(1)5 {
gen aux`g'pct=sector`g'pct*transaction_value_tot*0.01
}
* Sum up disbursement amounts of different purposes as these are ranked by percentage share in total disbursement (e.g., sometimes education might be mjsector1 for a schooling project, but for the next project of a new apprenticeship program only mjsector2)
foreach g in AX BX CX EX FX JX LX TX WX YX{
gen Disbursementcount_`g'=0
forvalues t=1(1)5 {
gen aux`t'=0
replace aux`t'=aux`t'pct if mjsector`t'code=="`g'"
replace Disbursementcount_`g'=Disbursementcount_`g'+Disbursementcount if mjsector`t'code=="`g'"
}
* Add all sectoral shares times total disbursement amount up as we need to go through the whole ranking (e.g., sometimes sanitation is the first sector, but sometimes only the fifth). 
gen transaction_value_tot_`g'=aux1+aux2+aux3+aux4+aux5
drop aux1 aux2 aux3 aux4 aux5
}
drop aux*
save `i'.dta, replace 
}

* Put yearly disbursements together
clear
use 1995
forvalues i=1996(1)2012 {
append using `i'.dta
erase `i'.dta
}
erase 1995.dta
keep if (precision_N100<=4)
sort project_id transaction_year
save "IBRD_disbursement.dta", replace

********************************************************************************
//2) Prepare location weighted data
********************************************************************************
cd "$cwd" 
* Execute program to allocate IBRD aid using location weights
allocAid IBRD IBRD


**********************************************************************	
//3) Prepare population weighted data
**********************************************************************
cd "$cwd" 
* Execute program to allocate IBRD aid using population weights
allocAid_pop IBRD IBRD





	
	
/*
****************************************************
* Generate location weighted Aid in adjacent regions 
****************************************************
** ADM2
* Load adjacency matrix for ADM2 regions
import excel using "$data\ADM\adm2_neighbors.xls", firstrow clear  //This dataset was created via adjacent_adm_classification.py. We use this adjacency matrix to match each ADM region with the disbursements in adjacent ADM regions.
* Drop Adjacent Regions in other country
drop if src_Name_0C75!= nbr_NAME_0C75
rename nbr_ID_ADMC12 ID_adm2
save "$data\ADM\adm2_neighbors.dta", replace

use "$data\ADM\1_1_1_R_pop_GADM1.dta", clear
renvars year rid1 isum_pop / transaction_year ID_adm1 isum_pop_ADM1
save ADM1POP.dta, replace

use "$data\ADM\1_1_1_R_pop_GADM2.dta", clear
renvars year rid2 isum_pop / transaction_year ID_adm2 isum_pop_ADM2
save ADM2POP.dta, replace

* Merge Adjacency matrix with Aid Disbursements in adjacent regions
forvalues i=1995(1)2012 {
use "IBRD_disbursement_ADM2_tsfill.dta", clear // The disbursements are matched in this step with the adjacent regions. Afterwards we collapse to receive the sum of the WB Aid in adjacent regions.
drop if transaction_year!=`i' 
* Mege with adjacent region
merge 1:m ID_adm2 using "$data\ADM\adm2_neighbors.dta", nogen keep(3 1)
* Merge with data on population 
merge m:1 ID_adm2 transaction_year using `ADM2POP', nogen keep(3 1)
* Collapse to get sum of WB aid (projects) in adjacent regions
collapse (sum) WBAID_ADM2* Disbursementcount* isum_pop_ADM2, by(src_ID_admC12 transaction_year)
renvars isum_pop_ADM2 src_ID_admC12 / Population_ADM2_ADJ ID_adm2
duplicates report ID_adm2
* Rename Variables to indicate that they are in the adjacent regions
renvars WBAID_ADM2 WBAID_ADM2_1loc / WBAID_ADM2_ADJ WBAID_ADM2_1loc_ADJ
rename Disbursementcount_ADM2 Disbursementcount_ADM2_ADJ
foreach g in AX BX CX EX FX JX LX TX WX YX{
renvars WBAID_ADM2_`g' WBAID_ADM2_1loc_`g' / WBAID_ADM2_ADJ_`g'  WBAID_ADM2_1loc_ADJ_`g'
rename Disbursementcount_ADM2_`g' Disbursementcount_ADM2_ADJ_`g'
}
save `i'.dta, replace 
}
* Put yearly disbursements in adjacent regions together
clear
use 1995
forvalues i=1996(1)2012 {
append using `i'.dta
erase `i'.dta
}
erase 1995.dta

keep ID_adm2 WBAID_ADM2_*ADJ* Population_ADM2_ADJ transaction_year Disbursementcount_ADM2*

* Merge Disbursement file with Disbursements in adjacent ADM2 regions
merge 1:1 ID_adm2 transaction_year using "IBRD_disbursement_ADM2_tsfill.dta", nogen keep(2 3)  
* Label variables
label var Population_ADM2_ADJ "Population in all adjacent ADM2 Regions"
label var WBAID_ADM2_ADJ "World Bank aid in all adjacent ADM2 regions"
label var WBAID_ADM2_1loc_ADJ "World Bank aid allocated to only 1 project location in all adjacent ADM2 regions"
label var Disbursementcount_ADM2_ADJ "No. of non-negative WB aid disbursements in adjacent ADM2 regions"
foreach g in AX BX CX EX FX JX LX TX WX YX{
label var WBAID_ADM2_ADJ_`g' "World Bank aid in all adjacent ADM2 regions in sector `g'"
label var WBAID_ADM2_1loc_ADJ_`g' "World Bank aid allocated to only 1 project location in all adjacent ADM2 regions in sector `g'"
label var Disbursementcount_ADM2_ADJ_`g' "No. of non-negative WB aid disbursements in adjacent ADM2 regions in sector `g'"
}
* Rename Variables as location weighted
renvars WBAID_ADM2 WBAID_ADM2_ADJ / WBAID_ADM2_LOC WBAID_ADM2_LOC_ADJ
foreach x in AX BX CX EX FX JX LX TX WX YX{
renvars WBAID_ADM2_`x' WBAID_ADM2_ADJ_`x' / WBAID_ADM2_LOC_`x' WBAID_ADM2_LOC_ADJ_`x'
}
save "IBRD_disbursement_ADM2_adjacent.dta", replace 


** ADM1
* Load adjacency matrix for ADM1 regions
import excel using "$data\ADM\adm1_neighbors.xls", firstrow clear
renvars src_ID_0N100 src_ID_1N100  nbr_ID_0N100 nbr_ID_1N100 / src_id_0 src_id_1 nbr_id_0 nbr_id_1
*create unique region ids
gen c = "c"
gen r = "r"
egen src_ID_admC7 = concat(c src_id_0 r src_id_1)
egen nbr_ID_admC7 = concat(c nbr_id_0 r nbr_id_1)
drop c r
rename nbr_ID_admC7 ID_adm1

* Drop Adjacent Regions in other country
drop if src_Name_0C75!= nbr_NAME_0C75
save "$data\ADM\adm1_neighbors.dta", replace

* Merge Adjacency matrix with Aid Disbursements in adjacent regions
forvalues i=1995(1)2012 {
use "IBRD_disbursement_ADM1_tsfill.dta", clear     // The disbursements are matched in this step with the adjacent regions. Afterwards we collapse to receive the sum of the WB Aid in adjacent regions.         
drop if transaction_year!=`i'
* Merge with adjacent regions						
merge 1:m ID_adm1 using "$data\ADM\adm1_neighbors.dta", nogen keep(1 3)
* Merge with population Data
merge m:1 ID_adm1 transaction_year using `ADM1POP', nogen keep(3 1)
* Collapse to get sum of WB aid (projects) in adjacent regions
collapse (sum) WBAID_ADM1* Disbursementcount* isum_pop_ADM1, by(src_ID_admC7 transaction_year)
* Rename Variables to indicate that they are in the adjacent regions
renvars isum_pop_ADM1 src_ID_admC7 / Population_ADM1_ADJ ID_adm1
renvars WBAID_ADM1 WBAID_ADM1_1loc / WBAID_ADM1_ADJ WBAID_ADM1_1loc_ADJ
rename Disbursementcount_ADM1 Disbursementcount_ADM1_ADJ
foreach g in AX BX CX EX FX JX LX TX WX YX{
renvars WBAID_ADM1_`g' WBAID_ADM1_1loc_`g' / WBAID_ADM1_ADJ_`g'  WBAID_ADM1_1loc_ADJ_`g'
rename Disbursementcount_ADM1_`g' Disbursementcount_ADM1_ADJ_`g'
}
save `i'.dta, replace 
}
* Put yearly disbursements together
clear
use 1995
forvalues i=1996(1)2012 {
append using `i'.dta
erase `i'.dta
}
erase 1995.dta

keep ID_adm1 WBAID_ADM1_*ADJ*  Disbursementcount* transaction_year Population_ADM1_ADJ
* Merge Disbursement file with Disbursements in adjacent ADM1 regions
merge 1:m ID_adm1 transaction_year using "IBRD_disbursement_ADM1_tsfill.dta", nogen keep(2 3)
* Label all variables
label var Population_ADM1_ADJ "Population in neighboring regions"
label var WBAID_ADM1_ADJ "World Bank aid in all adjacent ADM1 regions"
label var WBAID_ADM1_1loc_ADJ "World Bank aid allocated to only 1 project location in all adjacent ADM2 regions"
label var Disbursementcount_ADM1_ADJ "No. of non-negative WB aid disbursements in adjacent ADM1 regions"
foreach g in AX BX CX EX FX JX LX TX WX YX{
label var WBAID_ADM1_ADJ_`g' "World Bank aid in all adjacent ADM1 regions in sector `g'"
label var WBAID_ADM1_1loc_ADJ_`g' "World Bank aid allocated to only 1 project in all adjacent ADM1 regions in sector `g'"
label var Disbursementcount_ADM1_ADJ_`g' "No. of non-negative WB aid disbursements in adjacent ADM1 regions in sector `g'"
}
* Rename Variables as location weighted
renvars WBAID_ADM1 WBAID_ADM1_ADJ / WBAID_ADM1_LOC WBAID_ADM1_LOC_ADJ
foreach x in AX BX CX EX FX JX LX TX WX YX{
renvars WBAID_ADM1_`x' WBAID_ADM1_ADJ_`x' / WBAID_ADM1_LOC_`x' WBAID_ADM1_LOC_ADJ_`x'
}
save "IBRD_disbursement_ADM1_adjacent.dta", replace


****************************************************
* Generate population weighted Aid in adjacent regions 
****************************************************
** ADM2

* Merge Adjacency matrix with Aid Disbursements in adjacent regions
forvalues i=1995(1)2012 {
use "IBRD_disbursement_ADM2_Wpop.dta", clear // The disbursements are matched in this step with the adjacent regions. Afterwards we collapse to receive the sum of the WB Aid in adjacent regions.
drop if transaction_year!=`i' 
* Mege with adjacent region
merge 1:m ID_adm2 using "$data\ADM\adm2_neighbors.dta", nogen keep(3 1)
* Merge with population Data
merge m:1 ID_adm2 transaction_year using `ADM2POP', nogen keep(3 1)
* Collapse to get sum of WB aid (projects) in adjacent regions
collapse (sum) WBAID_ADM2* Disbursementcount* isum_pop_ADM2, by(src_ID_admC12 transaction_year)
renvars isum_pop_ADM2 src_ID_admC12 / Population_ADM2_ADJ ID_adm2
duplicates report ID_adm2
* Rename Variables to indicate that they are in the adjacent regions
rename WBAID_ADM2_Wpop WBAID_ADM2_Wpop_ADJ
rename Disbursementcount_ADM2 Disbursementcount_ADM2_ADJ
foreach g in AX BX CX EX FX JX LX TX WX YX{
rename WBAID_ADM2_Wpop_`g' WBAID_ADM2_Wpop_ADJ_`g'
rename Disbursementcount_ADM2_`g' Disbursementcount_ADM2_ADJ_`g'
}
save `i'.dta, replace 
}
* Put yearly disbursements in adjacent regions together
clear
use 1995
forvalues i=1996(1)2012 {
append using `i'.dta
erase `i'.dta
}
erase 1995.dta

keep ID_adm2 WBAID_ADM2_Wpop_ADJ* transaction_year Disbursementcount_ADM2* Population_ADM2_ADJ

* Merge Disbursement file with Disbursements in adjacent ADM2 regions
merge 1:1 ID_adm2 transaction_year using "IBRD_disbursement_ADM2_Wpop.dta", nogen keep(2 3)  
* Label variables
label var Population_ADM2_ADJ "Population in neighboring regions"
label var WBAID_ADM2_Wpop_ADJ "Pop. weighted World Bank aid in all adjacent ADM2 regions"
label var Disbursementcount_ADM2_ADJ "No. of non-negative WB aid disbursements in adjacent ADM2 regions"
foreach g in AX BX CX EX FX JX LX TX WX YX{
label var WBAID_ADM2_Wpop_ADJ "Pop. Weighted World Bank aid in all adjacent ADM2 regions in sector `g'"
label var Disbursementcount_ADM2_`g' "No. of non-negative WB aid disbursements in adjacent ADM2 regions in sector `g'"
}
save "IBRD_disbursement_ADM2_Wpop_adjacent.dta", replace 


** ADM1

* Merge Adjacency matrix with Aid Disbursements in adjacent regions
forvalues i=1995(1)2012 {
use "IBRD_disbursement_ADM1_Wpop.dta", clear     // The disbursements are matched in this step with the adjacent regions. Afterwards we collapse to receive the sum of the WB Aid in adjacent regions.         
drop if transaction_year!=`i'
* Merge with adjacent regions						
merge 1:m ID_adm1 using "$data\ADM\adm1_neighbors.dta", nogen keep(1 3)
* Merge with population Data
merge m:1 ID_adm1 transaction_year using `ADM1POP', nogen keep(3 1)
* Collapse to get sum of WB aid (projects) in adjacent regions
collapse (sum) WBAID_ADM1* Disbursementcount* isum_pop_ADM1, by(src_ID_admC7 transaction_year)
* Rename Variables to indicate that they are in the adjacent regions
rename src_ID_admC7 ID_adm1
renvars isum_pop_ADM1 WBAID_ADM1_Wpop / Population_ADM1_ADJ WBAID_ADM1_Wpop_ADJ
rename Disbursementcount_ADM1 Disbursementcount_ADM1_ADJ
foreach g in AX BX CX EX FX JX LX TX WX YX{
rename WBAID_ADM1_Wpop_`g' WBAID_ADM1_Wpop_ADJ_`g'
rename Disbursementcount_ADM1_`g' Disbursementcount_ADM1_ADJ_`g'
}
save `i'.dta, replace 
}
* Put yearly disbursements together
clear
use 1995
forvalues i=1996(1)2012 {
append using `i'.dta
erase `i'.dta
}
erase 1995.dta

keep ID_adm1 WBAID_ADM1_Wpop_ADJ*  Disbursementcount* transaction_year Population_ADM1_ADJ
* Merge Disbursement file with Disbursements in adjacent ADM1 regions
merge 1:m ID_adm1 transaction_year using "IBRD_disbursement_ADM1_Wpop.dta", nogen keep(2 3)
* Label all variables
label var Population_ADM1_ADJ "Population in neighboring regions"
label var WBAID_ADM1_Wpop_ADJ "Pop. Weighted World Bank aid in all adjacent ADM1 regions"
label var Disbursementcount_ADM1_ADJ "No. of non-negative WB aid disbursements in adjacent ADM1 regions"
foreach g in AX BX CX EX FX JX LX TX WX YX{
label var WBAID_ADM1_Wpop_ADJ_`g' "Pop. Weighted World Bank aid in all adjacent ADM1 regions in sector `g'"
label var Disbursementcount_ADM1_ADJ_`g' "No. of non-negative WB aid disbursements in adjacent ADM1 regions in sector `g'"
}
save "IBRD_disbursement_ADM1_Wpop_adjacent.dta", replace
*/
**************************
**************************
* C: Create Chinese Aid Data *
**************************
**************************

cd "$cwd"



*******************
* Load Project Data
********************
import excel using "$data\Aid_China\aiddata_china_1_1_1.xlsx", sheet("1) Official Finance") firstrow clear
rename year transaction_year
save OF.dta, replace

*********************
* Load GADM-Aid Data
*********************
* Load ADM1 Data for the cases, where no ADM2 shapefile existed
import delim using "$data\Aid_China\spatial_join_adm1_chinese_aid.csv", clear
//generate local of ChinaAid countries to drop non ChinaAid countries at a later stage
levelsof id_0n100, local(ChinaAidCountries)

* Keep ID1 Identifier to merge these into ADM2 Data
keep target_fidn100 id_0n100 id_1n100 join_fidn100 isoc3 name_0c75 name_1c75
save adm1_v.dta, replace

**********************************
* Load ADM2 data
import delim using "$data\Aid_China\spatial_join_adm2_chinese_aid.csv", clear
drop id_0n100 isoc3 name_0c75 id_1n100 name_1c75  //clear entries from errors if no ADM2 regions identified; otherwise missing id_0 and id_1 entries

* Merge with the ADM1 Identifiers as there are some ADM2 regions missing and in this case the ADM1 region is also not coded (This issue seems persistent for two observations, "which fall into the sea")
merge 1:1 target_fidn100 using adm1_v.dta, nogen keep(1 3)

*create dummy variable indicating if a GADM2 region is missing, thus have been replaced by GADM1 region
gen byte d_miss_ADM2=(id_2n100==0 & id_1n100!=0)

renvars project_idn100 year_n100/  project_id transaction_year

//Manuall code projects that are identified in the ocean, but are acutally precisely coded, which is >=precision4 (See protocol from 03-09-2017 last pages)
replace id_0n100= 87 if project_id==1468 & join_fidn100==-1 & id_0n100==0
replace id_1n100= 5 if project_id==1468 & join_fidn100==-1 & id_1n100==0
replace id_2n100= 72 if project_id==1468 & join_fidn100==-1 & id_2n100==0
replace isoc3= "GHA" if project_id==1468 & join_fidn100==-1
replace name_0c75= "Ghana" if project_id==1468 & join_fidn100==-1
replace name_1c75= "Greater Accra" if project_id==1468 & join_fidn100==-1
replace name_2c75= "Dangbe West" if project_id==1468 & join_fidn100==-1


replace id_0n100= 203 if project_id==2081 & join_fidn100==-1 & id_0n100==0
replace id_1n100= 17 if project_id==2081 & join_fidn100==-1 & id_1n100==0
replace id_2n100= 0 if project_id==2081 & join_fidn100==-1 & id_2n100==0
replace isoc3= "SYC" if project_id==2081 & join_fidn100==-1
replace name_0c75= "Seychelles" if project_id==2081 & join_fidn100==-1
replace name_1c75= "Les Mamelles" if project_id==2081 & join_fidn100==-1 & name_1c75==""
replace d_miss_ADM2=1 if project_id==2081

replace id_0n100= 203 if project_id==1161 & join_fidn100==-1 & id_0n100==0
replace id_1n100= 22 if project_id==1161 & join_fidn100==-1 & id_1n100==0
replace id_2n100= 0 if project_id==1161 & join_fidn100==-1 & id_2n100==0
replace isoc3= "SYC" if project_id==1161 & join_fidn100==-1
replace name_0c75= "Seychelles" if project_id==1161 & join_fidn100==-1
replace name_1c75= "Pointe Larue" if project_id==1161 & join_fidn100==-1 & name_1c75=="" & precision_n100==4
replace d_miss_ADM2=1 if project_id==1161

replace id_0n100= 118 if project_id==1291 & join_fidn100==-1 & id_0n100==0
replace id_1n100= 28 if project_id==1291 & join_fidn100==-1 & id_1n100==0
replace id_2n100= 182 if project_id==1291 & join_fidn100==-1 & id_2n100==0 // Melvin 18.01.2018: Manually looked up geocode. Likely to belong to Mombasa-Mvita region
replace isoc3= "KEN" if project_id==1291 & join_fidn100==-1
replace name_0c75= "Kenya" if project_id==1291 & join_fidn100==-1
replace name_1c75= "Mombasa" if project_id==1291 & join_fidn100==-1
replace name_2c75= "Mvita" if project_id==1291 & join_fidn100==-1


* Generate unique identifier for each ADM region:
gen c = "c"
gen r = "r"
egen ID_adm1 = concat(c id_0n100 r id_1n100)
egen ID_adm2 = concat(c id_0n100 r id_1n100 r id_2n100)
drop c r  
label var ID_adm1 "Unique identifier for ADM1 region"
label var ID_adm2 "Unique identifier for ADM2 region"

* Merge the location data (master) with actual flow data (using)
merge m:1 project_id transaction_year using OF.dta, nogen //no mismatch from master

* Clean Data 
keep id_2n100 id_1n100 id_0n100 isoc3 precision_n100 d_miss_ADM2 name_0c75 name_1c75 name_2c75 project_id transaction_year titlec254 year_uncerc254 crs_sectorn100 crs_sect_1c254 sector_comc254 statusc254 status_codn100 flowc254 donor_ag_1n100 verifiedc254 flow_classc254 flow_cla_1n100 intentc254 activec254 start_actuc254 start_planc254 end_actualc254 end_plannec254 loan_typec254 line_of_crc254 is_cofinanc254 is_ground_c254 is_offician100 ID_adm1 ID_adm2 flow sources_count amount currency deflators_used exchange_rates_used usd_defl usd_current verified_cn100
* Drop observations which only signify a pledge, but no full-fetched disbursement
drop if statusc254=="Pipeline: Pledge"

*** Melvin 28.02.2018: Decision taken on skype. Use ODA only
* Keep only Official Development Assistance (ODA) & Other Official Finance (OOF) flows
keep if  flow_classc254=="ODA-like" //| flow_classc254=="Vague (Official Finance)" | flow_classc254=="OOF-like"

*** XXX Lennart 04.06.2018: In the following lines sectoral codes are added analoguously to WB aid.
* Create sectoral indicators, which correspond (broadly) to World Bank Sector 
gen mjsector1code=""
replace mjsector1code="AX" if crs_sectorn100==310 // WB: "Agriculture, fishing, and forestry"	CRS/CHINA: " Agriculture, Forestry and Fishing"
replace mjsector1code="BX" if crs_sectorn100==150 // WB:  "Public Administration, Law, and Justice" CRS/CHINA:	"Government and Civil Society"
replace mjsector1code="CX" if crs_sectorn100==220 // WB: "Information and communications" CRS/CHINA:	"Communications"
replace mjsector1code="EX" if crs_sectorn100==110 // WB:  "Education"	 CRS/CHINA: "Education"
replace mjsector1code="FX" if crs_sectorn100==240 // WB:  "Finance"	 CRS/CHINA: "Banking and Financial Services"
replace mjsector1code="JX" if crs_sectorn100==120 | crs_sectorn100==160  // WB: "Health and other social services"	CRS/CHINA: "Health"	"Other Social infrastructure and services"	
replace mjsector1code="LX" if crs_sectorn100==230  // WB:  "Energy and mining"  CRS/CHINA: "Energy Generation and Supply"	
replace mjsector1code="TX" if crs_sectorn100==210 // WB: "Transportation" CRS/CHINA:	"Transport and Storage"
replace mjsector1code="WX" if crs_sectorn100==140  // WB:  "Water, sanitation and flood protection"	CRS/CHINA: "Water Supply and Sanitation"
replace mjsector1code="YX" if crs_sectorn100==330  | crs_sectorn100==320 // WB:  "Industry and Trade"	CRS/CHINA: "Trade and Tourism"		 "Industry, Mining, Construction"
replace mjsector1code="Other" if crs_sectorn100==420 | crs_sectorn100==998 | crs_sectorn100==920 |  crs_sectorn100==250 | crs_sectorn100==130 | crs_sectorn100==430 |  crs_sectorn100==530 | crs_sectorn100==510  | ///
 crs_sectorn100==700 | crs_sectorn100==520 | crs_sectorn100==600 // WB: All Allocated/NA; CRS: "Women in Development" ; "Unallocated / Unspecified" ; "Business and Other Services"; "Support to Non-governmental Org..." ; "Population Policies / Programmes and ..."; "Other Multisector" "Non-food commodity assistance"; "General Budget Support"; "Developmental Food Aid/Food Security A.." "Action relating to debt"
* Sum up disbursement amounts of different purposes as these are ranked by percentage share in total disbursement.
foreach g in AX BX CX EX FX JX LX TX WX YX{
gen usd_current_`g'=usd_current if mjsector1code=="`g'"
}


* rename data to fit program algorithm
renvars isoc3 id_0n100 id_1n100 id_2n100 precision_n100 name_0c75 name_1c75 name_2c75 / ISO3 ID_0 ID_1 ID_2 precision_N100 ADM0 ADM1 ADM2

save china_temp1.dta, replace

****next step: discount aid
use china_temp1.dta, clear
rename usd_current transaction_value_tot
drop if transaction_value==.  //963 out of 3117 obs missing (30%)

* Generate count variable for number of positive project disbursements
gen count=1 if transaction_value_tot>0
egen Disbursementcount=total(count), by(project_id transaction_year)
label var Disbursementcount "Sum of yearly positive disbursements within project" 
drop count

* Repeat creation of disbursementcounts at the sectoral level
foreach g in AX BX CX EX FX JX LX TX WX YX{
rename usd_current_`g' transaction_value_tot_`g' 
gen count_`g'=1 if transaction_value_tot_`g'>0 & transaction_value_tot_`g'!=.
egen Disbursementcount_`g'=total(count_`g'), by(project_id transaction_year)
label var Disbursementcount_`g' "Sum of yearly positive disbursements within project of sector `g'" 
drop count_`g'
}

//temp_totlocation: Number of locations with positive project disbursements for entire project year
gen count=1
egen temp_totlocation=total(count), by(project_id transaction_year)
drop count

//temp_totcoded: Number of locations that are precisely coded (higher than precision level 4)
gen count=1 if precision_N100<=4
egen temp_totcoded=total(count), by(project_id transaction_year)
drop count

//temp_totcoded: Number of locations that are coded with precision 4
gen count=1 if precision_N100==4
egen temp_totcoded4=total(count), by(project_id transaction_year)
drop count

//temp_totcoded: Number of locations that are precisely coded (higher than precision level 4)
gen count=1 if precision_N100<4
egen temp_totcoded13=total(count), by(project_id transaction_year)
drop count

//temp_projsum: Total amount of project amount to be allocated to different regions after discounting for information loss
rename transaction_value_tot temp_value																
gen transaction_value_tot= temp_totcoded/temp_totlocation*temp_value


keep if (precision_N100<=4) //note: more than 92% are coded higher than 4 at this stage

//rename more variables after merge
rename ID_0 ID_0N100
rename ID_1 ID_1N100
rename ID_2 ID_2N100


sort project_id transaction_year
save "China_disbursement.dta", replace

/* xxxxxxxxxxxx Melvin 26.01.2018
Note China_disbursement.dta contains

        flow_class,C,254 |      Freq.     Percent        Cum.
-------------------------+-----------------------------------
                ODA-like |        710       48.63       48.63
                OOF-like |        120        8.22       56.85
Vague (Official Finance) |        630       43.15      100.00
-------------------------+-----------------------------------
                   Total |      1,460      100.00

If want to have disbursement by type, need to keep  by flow_class and run programs
Previously used follwowing definition
gen CODA_ADM2_Wpop=CAID_Wpop_ADM2 if flow_classc254=="ODA-like"
gen COOF_ADM2_Wpop=CAID_Wpop_ADM2 if flow_classc254=="Vague (Official Finance)" | flow_classc254=="OOF-like"
*/

********************************************************************************
//2) Prepare location weighted data
********************************************************************************
cd "$cwd" 
* Execute program to allocate Chinese aid using location weights
allocAid China CAID
**********************************************************************	
//3) Prepare population weighted data
**********************************************************************
* Execute program to allocate Chinese aid using population weights
cd "$cwd" 
allocAid_pop China CAID









/*
****************************************************
* Generate location weighted Aid in adjacent regions 
****************************************************
use "$data\ADM\1_1_1_R_pop_GADM1.dta", clear
renvars year rid1 isum_pop / transaction_year ID_adm1 isum_pop_ADM1

save ADM1POP.dta, replace

use "$data\ADM\1_1_1_R_pop_GADM2.dta", clear
renvars year rid2 isum_pop / transaction_year ID_adm2 isum_pop_ADM2

save ADM2POP.dta, replace


* Need to do this on a yearly basis to prevent odd merges
forvalues i=2000(1)2012 {
use adm2.dta, clear
keep if transaction_year==`i'
* Mege with adjacent region
merge 1:m ID_adm2 using "$data\ADM\adm2_neighbors.dta", nogen keep(3 1)
* Merge with data on population 
merge m:1 ID_adm2 transaction_year using `ADM2POP', nogen keep(3 1)
* Collapse to get sum of WB aid (projects) in adjacent regions
collapse (sum) CODA_ADM2_LOC COOF_ADM2_LOC CODA_ADM2_Wpop COOF_ADM2_Wpop isum_pop_ADM2, by(src_ID_admC12 transaction_year)
* Rename variables to indicate that they are in an adjacent region
foreach v in CODA_ADM2_LOC COOF_ADM2_LOC  CODA_ADM2_Wpop COOF_ADM2_Wpop {
rename `v' `v'_ADJ
}
renvars isum_pop_ADM2 src_ID_admC12 / Population_ADM2_ADJ ID_adm2
tempfile `i'
save ``i'', replace
}

* Put yearly disbursements in adjacent regions together
use `2000', clear
forvalues i=2001(1)2012 {
append using ``i''
}

* Label variables
label var CODA_ADM2_LOC_ADJ "Chinese ODA-like flows to adjacent regions (location weighted)"
label var CODA_ADM2_Wpop_ADJ "Chinese ODA-like flows to adjacent regions (population weighted)"
label var COOF_ADM2_LOC_ADJ "Chinese other official finance to adjacent regions (location weighted)"
label var COOF_ADM2_Wpop_ADJ "Chinese other official finance to adjacent regions (population weighted)"
label var Population_ADM2_ADJ "Population in adjacent regions"
* Merge Disbursements in adjacent regions with disbursements in specific region
merge 1:1 ID_adm2 transaction_year using `adm2', nogen 

//some regions do not receive aid, but the adjacent ones. The merge introduces missings for these regions. Code them as getting no aid.
foreach var of varlist CODA_* COOF_* {
replace `var'=0 if `var'==. 
}

save "$data\Aid\Chinese_Finance_ADM2_adjacent.dta", replace 


* ADM1 LEVEL
* Need to do this on a yearly basis to prevent odd merges
forvalues t=2000(1)2012 {
use adm1.dta, clear
keep if transaction_year==`t'
* Mege with adjacent region
merge 1:m ID_adm1 using "$data\ADM\adm1_neighbors.dta", nogen keep(1 3)
* Merge with data on population 
merge m:1 ID_adm1 transaction_year using ADM1POP.dta, nogen keep(3 1)
* Collapse to get sum of WB aid (projects) in adjacent regions
collapse (sum) CODA_ADM1_LOC COOF_ADM1_LOC  CODA_ADM1_Wpop COOF_ADM1_Wpop isum_pop_ADM1, by(src_ID_admC7 transaction_year)
* Rename variables to indicate that they are in an adjacent region
foreach v in CODA_ADM1_LOC COOF_ADM1_LOC  CODA_ADM1_Wpop COOF_ADM1_Wpop {
rename `v' `v'_ADJ
}
renvars isum_pop_ADM1 src_ID_admC7 / Population_ADM1_ADJ ID_adm1

save `t'.dta, replace
}

* Put yearly disbursements in adjacent regions together
use 2000.dta, clear
forvalues t=2001(1)2012 {
append using `t'.dta
erase `t'.dta
}
erase 2000.dta
* Label variables
label var CODA_ADM1_LOC_ADJ "Chinese ODA-like flows to adjacent regions (location weighted)"
label var CODA_ADM1_Wpop_ADJ "Chinese ODA-like flows to adjacent regions (population weighted)"
label var COOF_ADM1_LOC_ADJ "Chinese other official finance to adjacent regions (location weighted)"
label var COOF_ADM1_Wpop_ADJ "Chinese other official finance to adjacent regions (population weighted)"
label var Population_ADM1_ADJ "Population in adjacent regions"
* Merge Disbursements in adjacent regions with disbursements in specific region
merge 1:1 ID_adm1 transaction_year using adm1.dta, nogen 


//some regions do not receive aid, but the adjacent ones. The merge introduces missings for these regions. Code them as getting no aid.
foreach var of varlist CODA_* COOF_* {
replace `var'=0 if `var'==. 
}

save "$data\Aid\Chinese_Finance_ADM1_adjacent.dta", replace 
*/

**************************
**************************
* D: Create Indian Aid Data *
**************************
**************************
cd "$cwd" 

* Load ADM1 Data for the cases, where no ADM2 shapefile existed
import delim using "$data\Aid_India\gis_out\i_alg2_adm1.csv", clear
//manually delete one error in raw IndianAid Data. Confirmed by Gerda Asmussen
*XXXXXXXXX 06.12.2017 Lennart: Why is it deleted and not corrected? We might have already talked about it and if it is one of those places, which fall into the water and/or drop out anyways, it should be no issue at all.
*XXXXXXXXX 29.12.2017 Melvin: Is is not a region in the water. Gerda told me that this point is wrongly coded in the raw data. The drop must remain.
drop if place_name=="Pochampally Handloom Park"
* Generate unique identifier for each ADM region:
gen c = "c"
gen r = "r"
egen ID_adm1_v = concat(c id_0 r id_1)
drop c r  
label var ID_adm1 "Unique identifier for ADM1 region"
keep target_fid ID_adm1_v join_fid id_1 id_0 name_0 iso id_1 name_1 
renvars join_fid / join_v
drop if iso==""
save adm1_v.dta, replace


* Load ADM2 data
import delim using "$data\Aid_India\gis_out\i_alg2_adm2.csv", clear

//rename variables
renvars aiddata_pr year /  project_id transaction_year
//manually delete one error in raw IndianAid Data. Confirmed by Gerda Asmus
drop if place_name=="Pochampally Handloom Park"
//clear entries from errors if no ADM2 regions identified; otherwise missing id_0 and id_1 entries
drop id_0 name_0 iso id_1 name_1 
merge 1:1 target_fid using adm1_v.dta, nogen keep(1 3)

//rename more variables after merge
rename name_0 ADM0
rename name_1 ADM1
rename name_2 ADM2
rename iso ISO3
rename id_0 ID_0N100
rename id_1 ID_1N100
rename id_2 ID_2N100
rename precision_ precision_N100
rename usd_commit transaction_value_tot //XXXXXX Melvin 27.01.2018: Only 12 project have disbursements. take committment instead

//create dummy variable indicating if a GADM2 region is missing, thus have been replaced by GADM1 region
gen byte d_miss_ADM2=(ID_2N100==0 & ID_1N100!=0)

* Generate unique identifier for each ADM region:
gen c = "c"
gen r = "r"
egen ID_adm1 = concat(c ID_0N100 r ID_1N100)
egen ID_adm2 = concat(c ID_0N100 r ID_1N100 r ID_2N100)
drop c r  
label var ID_adm1 "Unique identifier for ADM1 region"
label var ID_adm2 "Unique identifier for ADM2 region"

* Keep only Official Development Assistance (ODA) & Other Official Finance (OOF) flows, some projects have no flow_type specification
keep if  flow_type=="ODA" | flow_type=="OOF" | flow_type=="OOF-like Export Credit"


********************************************************************************
//1) Create yearly disbursements data discounted by information loss
********************************************************************************
drop if transaction_value_tot==.

gen count=1 if transaction_value_tot>0
egen Disbursementcount=total(count), by(project_id transaction_year)
label var Disbursementcount "Sum of yearly positive disbursements within project" 
drop count

//temp_totlocation: Number of locations with positive project disbursements for entire project year
gen count=1
egen temp_totlocation=total(count), by(project_id transaction_year)
drop count

//temp_totcoded: Number of locations that are precisely coded (higher than precision level 4)
gen count=1 if precision_N100<=4
egen temp_totcoded=total(count), by(project_id transaction_year)
drop count

//temp_totcoded: Number of locations that are coded with precision 4
gen count=1 if precision_N100==4
egen temp_totcoded4=total(count), by(project_id transaction_year)
drop count

//temp_totcoded: Number of locations that are precisely coded (higher than precision level 4)
gen count=1 if precision_N100<4
egen temp_totcoded13=total(count), by(project_id transaction_year)
drop count

//temp_projsum: Total amount of project amount to be allocated to different regions after discounting for information loss
rename transaction_value_tot temp_value																
gen transaction_value_tot= temp_totcoded/temp_totlocation*temp_value


keep if (precision_N100<=4) //note: more than 92% are coded higher than 4 at this stage
sort project_id transaction_year
save "India_disbursement.dta", replace




/* xxxxxxxxxxxx Melvin 26.01.2018
Note India_disbursement.dta contains


             flow_type |      Freq.     Percent        Cum.
-----------------------+-----------------------------------
                   ODA |         55       44.72       44.72
                   OOF |         66       53.66       98.37
OOF-like Export Credit |          2        1.63      100.00
-----------------------+-----------------------------------
                 Total |        123      100.00

previous coding
gen IODA_ADM2_Wpop=IAID_ADM2_Wpop if flow_type=="ODA"
gen IOOF_ADM2_Wpop=IAID_ADM2_Wpop if flow_type=="OOF" | flow_type=="OOF-like Export Credit"
gen IODA_ADM2_LOC=IAID_ADM2_LOC if flow_type=="ODA"
gen IOOF_ADM2_LOC=IAID_ADM2_LOC if flow_type=="OOF" | flow_type=="OOF-like Export Credit"

*/

********************************************************************************
//2) Prepare location weighted data
********************************************************************************
* Execute program to allocate Chinese aid using location weights
cd "$cwd" 
allocAid India IAID
**********************************************************************	
//3) Prepare population weighted data
**********************************************************************
* Execute program to allocate Chinese aid using population weights
cd "$cwd" 
allocAid_pop India IAID



/*
****************************************************
* Generate location weighted Aid in adjacent regions 
****************************************************
use "$data\ADM\1_1_1_R_pop_GADM1.dta", clear
renvars year rid1 isum_pop / transaction_year ID_adm1 isum_pop_ADM1
save ADM1POP.dta, replace

use "$data\ADM\1_1_1_R_pop_GADM2.dta", clear
renvars year rid2 isum_pop / transaction_year ID_adm2 isum_pop_ADM2
tempfile ADM2POP
save ADM2POP.dta, replace


* Need to do this on a yearly basis to prevent odd merges
use `adm2', clear
levelsof transaction_year, local(T)
foreach i in `T' {
use `adm2', clear
keep if transaction_year==`i'
* Mege with adjacent region
merge 1:m ID_adm2 using "$data\ADM\adm2_neighbors.dta", nogen keep(3 1)
* Merge with data on population 
merge m:1 ID_adm2 transaction_year using `ADM2POP', nogen keep(3 1)
* Collapse to get sum of WB aid (projects) in adjacent regions
collapse (sum) IODA_ADM2_LOC IOOF_ADM2_LOC  IODA_ADM2_Wpop IOOF_ADM2_Wpop isum_pop_ADM2, by(src_ID_admC12 transaction_year)
* Rename variables to indicate that they are in an adjacent region
foreach v in IODA_ADM2_LOC IOOF_ADM2_LOC  IODA_ADM2_Wpop IOOF_ADM2_Wpop {
rename `v' `v'_ADJ
}
renvars isum_pop_ADM2 src_ID_admC12 / Population_ADM2_ADJ ID_adm2
save `i'.dta, replace
}

* Put yearly disbursements in adjacent regions together (here only 2010 is used as we do not have any aid disbursements in previous years 
* XXXXXXXXXXX 06.12.2017 Lennart: We need to discuss if we take Indian aid into account for main analyses. If we would really do this, we should also consider using the complete conflict sample (so far we use only data up to 2012 as we focus on WB aid).
use `2010', clear
forvalues t=2011(1)2014 {
append using `t'
erase `i'.dta
}
erase 2010.dta
* Label variables
label var IODA_ADM2_LOC_ADJ "Indian ODA-like flows to adjacent regions (location weighted)"
label var IODA_ADM2_Wpop_ADJ "Indian ODA-like flows to adjacent regions (population weighted)"
label var IOOF_ADM2_LOC_ADJ "Indian other official finance to adjacent regions (location weighted)"
label var IOOF_ADM2_Wpop_ADJ "Indian other official finance to adjacent regions (population weighted)"
label var Population_ADM2_ADJ "Population in adjacent regions"
* Merge Disbursements in adjacent regions with disbursements in specific region
merge 1:1 ID_adm2 transaction_year using `adm2', nogen 

//some regions do not receive aid, but the adjacent ones. The merge introduces missings for these regions. Code them as getting no aid.
foreach var of varlist IODA_* IOOF_* {
replace `var'=0 if `var'==. 
}
save "$data\Aid\Indian_Finance_ADM2_adjacent.dta", replace 


* ADM1 LEVEL
* Need to do this on a yearly basis to prevent odd merges
use adm1.dta, clear
levelsof transaction_year, local(T)
foreach t in `T' {
use `adm1', clear
keep if transaction_year==`t'
* Mege with adjacent region
merge 1:m ID_adm1 using "$data\ADM\adm1_neighbors.dta", nogen keep(1 3)
* Merge with data on population 
merge m:1 ID_adm1 transaction_year using `ADM1POP', nogen keep(3 1)
* Collapse to get sum of WB aid (projects) in adjacent regions
collapse (sum) IODA_ADM1_LOC IOOF_ADM1_LOC  IODA_ADM1_Wpop IOOF_ADM1_Wpop isum_pop_ADM1, by(src_ID_admC7 transaction_year)
* Rename variables to indicate that they are in an adjacent region
foreach v in IODA_ADM1_LOC IOOF_ADM1_LOC  IODA_ADM1_Wpop IOOF_ADM1_Wpop {
rename `v' `v'_ADJ
}
renvars isum_pop_ADM1 src_ID_admC7 / Population_ADM1_ADJ ID_adm1
save `i'.dta, replace
}

* Put yearly disbursements in adjacent regions together

use 2010.dta, clear

* XXXXXXXXXXX 06.12.2017 Lennart: We need to discuss if we take Indian aid into account for main analyses. If we would really do this, we should also consider using the complete conflict sample (so far we use only data up to 2012 as we focus on WB aid).
forvalues t=2011(1)2014 {
append using `i'.dta
erase `i'.dta
}
erase 2010.dta
* Label variables
label var IODA_ADM1_LOC_ADJ "Indian ODA-like flows to adjacent regions (location weighted)"
label var IODA_ADM1_Wpop_ADJ "Indian ODA-like flows to adjacent regions (population weighted)"
label var IOOF_ADM1_LOC_ADJ "Indian other official finance to adjacent regions (location weighted)"
label var IOOF_ADM1_Wpop_ADJ "Indian other official finance to adjacent regions (population weighted)"
label var Population_ADM1_ADJ "Population in adjacent regions"
* Merge Disbursements in adjacent regions with disbursements in specific region
merge 1:1 ID_adm1 transaction_year using `adm1', nogen 

//some regions do not receive aid, but the adjacent ones. The merge introduces missings for these regions. Code them as getting no aid.
foreach var of varlist IODA_* IOOF_* {
replace `var'=0 if `var'==. 
}


save "$data\Aid\Indian_Finance_ADM1_adjacent.dta", replace 
*/





***************************
***************************
* E: Create Conflict Data *
***************************
***************************
/* Objective: Create two datasets (ADM1 and ADM2) containing
- BRD
- Number of conflict incidences
*/

cd "$cwd"
*** Load ACD2EPR Data as a tempfile to create ethnic conflict indicators
import delim using "$rawdata\ACD2EPR\ACD2EPR-2014-1.csv", clear delimiter(";")
rename dyadid dyad_dset_n100

* Transform ACD2EPR into a Panel
drop if from>2013
drop if to<1995
gen transaction_year=1995
gen id=_n
xtset id transaction_year
tsappend, add(17)
bysort id: carryforward statename dyad_dset_n100 gwid sideb sideb_id group gwgroupid from to claim recruitment support, replace
drop if from>transaction_year
drop if to<transaction_year
collapse (max) claim recruitment support, by(transaction_year dyad_dset_n100) // collapse by year AND dyad for later merge with UCDP data

save "ACD2EPR.dta", replace


*** Import UCDP data
//import delim using "$data\Conflict data\UCDP GED\ucdp_loc_gadm_cleaned_20171031.csv", clear delimiter(";")
// xxxx Melvin 12.04.2018: the csv from 20180412 contains the same infomration as the csv from 20171031, except that the string errors have been corrected that would have led to errors in reading the csv
import delim using "$data\Conflict data\UCDP GED\ucdp_loc_gadm_cleaned_20180412.csv", clear delimiter(";")

* Keep all types of violence (1= State involved; 2 = Only non-state actors involved; 3 = Violence against civilians both by state and non-state actors)
keep if type_of_vi==1 | type_of_vi==2 | type_of_vi==3
* drop events, which took place in airspace or in international waters
drop if where_prec>4 // For all other data at least country should be available
tab best_est if country==""
* Lennart: We should decide, which variables we really need (e.g., priogrid_gid )
keep year where_prec active_yea adm_1 adm_2 country best_est isoc3 name_0c75 name_1c75 name_2c75 id_* dyad_dset_n100  dyad_name type_of_vi

* Renaming
rename year transaction_year
* Lennart: For 8,972  observations the iso-code from GED!= iso-code from GADM --> Mostly due to cases like Soviet Union vs Russia or Israel vs. Palestine, but need to double check. I would suggest ISO Codes from GADM
renvars isoc3 name_0c75 name_1c75 name_2c75 adm_1  adm_2 dyad_namec254  / ISO3 ADM0 ADM1 ADM2 ADM1_UCDP ADM2_UCDP dyad_name 

*create unique region ids
gen c = "c"
gen r = "r"
egen ID_adm1 = concat(c id_0 r id_1)
egen ID_adm2 = concat(c id_0 r id_1 r id_2)

drop c r

* Drop some observations, which miss years
destring best_est* transaction_year, replace

*Merge with Indicators on ethnic conflict characteristics
merge m:1 dyad_dset_n100 transaction_year using ACD2EPR.dta, nogen keep(1 3)


*** create ethnic indicator
* Replace missings with zeroes assuming that the dataset is comprehensive @ Kai and Melvin: Does this makes sense?
*XXXXXX Melvin 03.02.2018: fill out missings as 0. @Kai, we should discuss this point?
replace claim=0 if claim==. 
replace recruitment=0 if recruitment==.
replace support=0 if support==.
/*XXXXXX Melvin 03.02.2018: Should all of the conditions be met, or does one not suffice? 
old code: replace ethnic=1 if claim>0 & recruitment>0 & support>0
Suggestion: replace ethnic=1 if claim>0 | recruitment>0 | support>0
Difference is that 44% are marked as ethnic conflict compared to 23%
*/
gen ethnic=0
replace ethnic=1 if claim>0 | recruitment>0 | support>0

*** create government indicator
gen gvmt=0
replace gvmt=1 if strpos( dyad_name , "Government")

*** create variable for BRD
gen brd_t1=best_estn100 if type_of_vi==1
gen brd_t2=best_estn100 if type_of_vi==2
gen brd_t3g=best_estn100 if type_of_vi==3 & gvmt==1
gen brd_t3ng=best_estn100 if type_of_vi==3 & gvmt==0
//Create best_est variables by type of ethnic dummy
gen brd_t1_eth=brd_t1 if ethnic==1
gen brd_t2_eth=brd_t2 if ethnic==1
gen brd_t3g_eth=brd_t3g if ethnic==1
gen brd_t3ng_eth=brd_t3ng if ethnic==1
*XXXXXX Melvin 03.02.2018: fill out missings as 0. @Kai, we should discuss this point?
* Replace missings with zeroes assuming that the dataset is comprehensive
foreach var of varlist brd_t* {
replace `var'=0 if `var'==.
}

*** create variable for conflict incidences. Here just a dummy. Later collapse this variable to count number of incidences
gen byte inci_t1= (brd_t1!=0)
gen byte inci_t2= (brd_t2!=0)
gen byte inci_t3g= (brd_t3g!=0)
gen byte inci_t3ng= (brd_t3ng!=0)
//Create best_est variables by type of ethnic dummy
gen byte inci_t1_eth= (brd_t1_eth!=0)
gen byte inci_t2_eth= (brd_t2_eth!=0)
gen byte inci_t3g_eth= (brd_t3g_eth!=0)
gen byte inci_t3ng_eth= (brd_t3ng_eth!=0)

* save dataset for collapsing on differen regions
save "brdprec1234.dta", replace

******** Prepare GADM1 conflict data ********
use "brdprec1234.dta", clear
rename id_0n100 ID_0N100

* Here we loose information on whether it was an active year of an ongoing conflict and the type of violence. This needs to be discussed in the group, whether we can miss these variables
*XXXXXX Melvin 03.02.2018: 95% are active and inactive ones still have BRD. Thus, keep observations
collapse (sum) best_estn100 brd* inci_*, by(ID_0N100 transaction_year ID_adm1 ID_adm2)


* create full dataset
	* get list of conflict countries and keep only those
	levelsof ID_0N100, local(country_list)
	merge m:1 ID_adm2 using gadm2.dta
	egen ever_conf=anymatch(ID_0N100), values(`country_list')
	keep if ever_conf==1
	* attribute first year to regions that never received aid to prepare for tsfill
	sum transaction_year
	replace transaction_year=`r(min)' if _merge==2
	// after replacing the year with minimum year we can drop _merge
	drop _merge ever_conf
	// replace missing ADM2 names if the ADM2 region is missing with ADM1 name and identifier
	replace ADM2=ADM1 if ADM2==""
	//Melvin H.L. Wong: 1. sort variables
	sort ID_adm2 transaction_year
	egen ID_adm2_num = group(ID_adm2)
	//Melvin H.L. Wong: 2. tsset Geounit Jahr
	tsset ID_adm2_num transaction_year
	//Melvin H.L. Wong: 3. tsfill, full
	tsfill, full //fill out data gaps
	gen years_reverse =-transaction_year
	//Melvin H.L. Wong: 4. carryforward, countryname etc
	bysort ID_adm2_num (transaction_year): carryforward ID_adm*, replace 
	bysort ID_adm2_num (years_reverse): carryforward ID_adm*, replace
	//Melvin H.L. Wong: 5. Fill out missings as 0. assume comprehensive data
	foreach var of varlist best_estn100 brd_t* inci_* {
		replace `var'=0 if `var'==.
	}
	drop years_reverse d_miss_ADM2
	order transaction_year ID_adm*
	sort ID_adm* transaction_year

preserve
* rename variables and save for ADM2
renvars best_estn100 brd_t1 brd_t2 brd_t3g brd_t3ng \ brd_adm2 brd_t1_adm2 brd_t2_adm2 brd_t3g_adm2 brd_t3ng_adm2
renvars brd_t1_eth brd_t2_eth brd_t3g_eth brd_t3ng_eth \ brd_t1_adm2_eth brd_t2_adm2_eth brd_t3g_adm2_eth brd_t3ng_adm2_eth
renvars inci_t1 inci_t2 inci_t3g inci_t3ng \ inci_t1_adm2 inci_t2_adm2 inci_t3g_adm2 inci_t3ng_adm2
renvars inci_t1_eth inci_t2_eth inci_t3g_eth inci_t3ng_eth \ inci_t1_adm2_eth inci_t2_adm2_eth inci_t3g_adm2_eth inci_t3ng_adm2_eth

label var brd_adm2 "Battle-related deaths for ADM2"
label var brd_t1_adm2 "Battle-related deaths for ADM2 - statebased"
label var brd_t2_adm2 "Battle-related deaths for ADM2 - nonstate"
label var brd_t3g_adm2 "Battle-related deaths for ADM2 - onesided government"
label var brd_t3ng_adm2 "Battle-related deaths for ADM2 - onesided nongovernment"
label var brd_t1_adm2_eth "Battle-related deaths for ADM2 - statebased; ethnic"
label var brd_t2_adm2_eth "Battle-related deaths for ADM2 - nonstate; ethnic"
label var brd_t3g_adm2_eth "Battle-related deaths for ADM2 - onesided government; ethnic"
label var brd_t3ng_adm2_eth "Battle-related deaths for ADM2 - onesided nongovernment; ethnic"
label var inci_t1_adm2 "Number of conflict incidences for ADM2 - statebased"
label var inci_t2_adm2 "Number of conflict incidences for ADM2 - nonstate"
label var inci_t3g_adm2 "Number of conflict incidences for ADM2 - onesided government"
label var inci_t3ng_adm2 "Number of conflict incidences for ADM2 - onesided nongovernment"
label var inci_t1_adm2_eth "Number of conflict incidences for ADM2 - statebased; ethnic"
label var inci_t2_adm2_eth "Number of conflict incidences for ADM2 - nonstate; ethnic"
label var inci_t3g_adm2_eth "Number of conflict incidences for ADM2 - onesided government; ethnic"
label var inci_t3ng_adm2_eth"Number of conflict incidences for ADM2 - onesided nongovernment; ethnic"

save "UCDP_GED_ADM2_tsfill(Ethnic vs Non-Ethnic).dta", replace

restore

collapse (sum) best_estn100 brd* inci_*, by(transaction_year ID_adm1) 
* rename variables and save for ADM2
renvars best_estn100 brd_t1 brd_t2 brd_t3g brd_t3ng \ brd_adm1 brd_t1_adm1 brd_t2_adm1 brd_t3g_adm1 brd_t3ng_adm1
renvars brd_t1_eth brd_t2_eth brd_t3g_eth brd_t3ng_eth \ brd_t1_adm1_eth brd_t2_adm1_eth brd_t3g_adm1_eth brd_t3ng_adm1_eth
renvars inci_t1 inci_t2 inci_t3g inci_t3ng \ inci_t1_adm1 inci_t2_adm1 inci_t3g_adm1 inci_t3ng_adm1
renvars inci_t1_eth inci_t2_eth inci_t3g_eth inci_t3ng_eth \ inci_t1_adm1_eth inci_t2_adm1_eth inci_t3g_adm1_eth inci_t3ng_adm1_eth

label var brd_adm1 "Battle-related deaths for ADM1"
label var brd_t1_adm1 "Battle-related deaths for ADM1 - statebased"
label var brd_t2_adm1 "Battle-related deaths for ADM1 - nonstate"
label var brd_t3g_adm1 "Battle-related deaths for ADM1 - onesided government"
label var brd_t3ng_adm1 "Battle-related deaths for ADM1 - onesided nongovernment"
label var brd_t1_adm1_eth "Battle-related deaths for ADM1 - statebased; ethnic"
label var brd_t2_adm1_eth "Battle-related deaths for ADM1 - nonstate; ethnic"
label var brd_t3g_adm1_eth "Battle-related deaths for ADM1 - onesided government; ethnic"
label var brd_t3ng_adm1_eth "Battle-related deaths for ADM1 - onesided nongovernment; ethnic"
label var inci_t1_adm1 "Number of conflict incidences for ADM1 - statebased"
label var inci_t2_adm1 "Number of conflict incidences for ADM1 - nonstate"
label var inci_t3g_adm1 "Number of conflict incidences for ADM1 - onesided government"
label var inci_t3ng_adm1 "Number of conflict incidences for ADM1 - onesided nongovernment"
label var inci_t1_adm1_eth "Number of conflict incidences for ADM1 - statebased; ethnic"
label var inci_t2_adm1_eth "Number of conflict incidences for ADM1 - nonstate; ethnic"
label var inci_t3g_adm1_eth "Number of conflict incidences for ADM1 - onesided government; ethnic"
label var inci_t3ng_adm1_eth"Number of conflict incidences for ADM1 - onesided nongovernment; ethnic"

save "UCDP_GED_ADM1_tsfill(Ethnic vs Non-Ethnic).dta", replace
















*XXXXXX Melvin 05.02.2018: Replaced entire code below
/*
*** Load ACD2EPR Data as a tempfile to create ethnic conflict indicators
import delim using "$rawdata\ACD2EPR\ACD2EPR-2014-1.csv", clear delimiter(";")
rename dyadid dyad_dset_n100

* Transform ACD2EPR into a Panel
drop if from>2013
drop if to<1995
gen transaction_year=1995
gen id=_n
xtset id transaction_year
tsappend, add(17)
bysort id: carryforward statename dyad_dset_n100 gwid sideb sideb_id group gwgroupid from to claim recruitment support, replace
drop if from>transaction_year
drop if to<transaction_year
collapse (max) claim recruitment support, by(transaction_year dyad_dset_n100) // collapse by year AND dyad for later merge with UCDP data

save "ACD2EPR.dta", replace

*** Import UCDP data
import delim using "$data\Conflict data\UCDP GED\ucdp_loc_gadm_cleaned_20171031.csv", clear delimiter(";")
* Keep all types of violence (1= State involved; 2 = Only non-state actors involved; 3 = Violence against civilians both by state and non-state actors)
keep if type_of_vi==1 | type_of_vi==2 | type_of_vi==3
* drop events, which took place in airspace or in international waters
drop if where_prec>4 // For all other data at least country should be available
tab best_est if country==""
* Lennart: We should decide, which variables we really need (e.g., priogrid_gid )
keep year where_prec active_yea adm_1 adm_2 country best_est isoc3 name_0c75 name_1c75 name_2c75 id_* dyad_dset_n100  dyad_name type_of_vi


*Create best_est variables by type of violence
foreach var in best_est_t1 best_est_t2 best_est_t3g best_est_t3ng{
gen `var'=0
}
gen gvmt=0
replace gvmt=1 if strpos( dyad_name , "Government")
replace best_est_t1=best_estn100 if type_of_vi==1
replace best_est_t2=best_estn100 if type_of_vi==2
replace best_est_t3g=best_estn100 if type_of_vi==3 & gvmt==1
replace best_est_t3ng=best_estn100 if type_of_vi==3 & gvmt==0


* Renaming
rename year transaction_year
* Lennart: For 8,972  observations the iso-code from GED!= iso-code from GADM --> Mostly due to cases like Soviet Union vs Russia or Israel vs. Palestine, but need to double check. I would suggest ISO Codes from GADM
renvars isoc3 name_0c75 name_1c75 name_2c75 adm_1  adm_2 dyad_namec254  / ISO3 ADM0 ADM1 ADM2 ADM1_UCDP ADM2_UCDP dyad_name 


*create unique region ids
gen c = "c"
gen r = "r"
egen ID_adm1 = concat(c id_0 r id_1)
egen ID_adm2 = concat(c id_0 r id_1 r id_2)

drop c r

* Drop some observations, which miss years
destring best_est* transaction_year, replace

*Merge with Indicators on ethnic conflict characteristics
merge m:1 dyad_dset_n100 transaction_year using ACD2EPR.dta, nogen keep(1 3)
* Replace missings with zeroes assuming that the dataset is comprehensive @ Kai and Melvin: Does this makes sense?
replace claim=0 if claim==. 
replace recruitment=0 if recruitment==.
replace support=0 if support==.
gen ethnic=0
replace ethnic=1 if claim>0 & recruitment>0 & support>0
save "brdprec1234.dta", replace

keep if where_prec==4

save "brdprec4.dta", replace

* Prepare location weighted data with precision code 4 (ADM2 information), i.e. allocate BRD fpr ADM1 equally to ADM2 regions
use brdprec4.dta, replace
gen count=1
bysort ID_adm1 transaction_year: egen incidence_adm14=total(count)

collapse (sum) best_est*  (mean) incidence_adm14, by(transaction_year ISO3 ADM0 ADM1 ID_adm1 ethnic)

renvars best_estn100 best_est_t1 best_est_t2 best_est_t3g best_est_t3ng / best_est_adm14 best_est_t1_adm14 best_est_t2_adm14 best_est_t3g_adm14 best_est_t3ng_adm14 

joinby ID_adm1 using gadm2.dta, update
//create missing id variables after joining datasets
gen c = "c"
gen r = "r"
drop ID_adm2
egen ID_adm2 = concat(c ID_0N100 r ID_1N100 r ID_2N100)
drop c r

gen count=1
bysort ID_adm1 transaction_year: egen count1=total(count)
replace count1=0 if count1==.
foreach g in best_est_adm14 best_est_t1_adm14 best_est_t2_adm14 best_est_t3g_adm14 best_est_t3ng_adm14  incidence_adm14{
replace `g'=`g'/count1
replace `g'=round(`g')
}

renvars best_est_adm14 best_est_t1_adm14 best_est_t2_adm14 best_est_t3g_adm14 best_est_t3ng_adm14   incidence_adm14 / best_est_adm24 best_est_t1_adm24 best_est_t2_adm24 best_est_t3g_adm24 ///
best_est_t3ng_adm24 incidence_adm24 
keep best_est_adm24  best_est_t1_adm24 best_est_t2_adm24 best_est_t3g_adm24 best_est_t3ng_adm24  incidence_adm24 transaction_year ISO3 ADM0 ADM1 ID_adm1 ID_adm2 ethnic

save Conflict_ADM2_prec4.dta, replace 

* Prepare population weighted data with precision code 4 (ADM2 information)
use brdprec4.dta, replace
gen count=1
bysort ID_adm1 transaction_year: egen incidence_adm14=total(count)
collapse (sum) best_est* (mean) incidence_adm14, by(transaction_year ISO3 ADM0 ADM1 ID_adm1 ethnic)

renvars best_estn100 best_est_t1 best_est_t2 best_est_t3g best_est_t3ng / best_est_adm14 best_est_t1_adm14 best_est_t2_adm14 best_est_t3g_adm14 best_est_t3ng_adm14 

joinby ID_adm1 using gadm2.dta, update
//create missing id variables after joining datasets
gen c = "c"
gen r = "r"
drop ID_adm2
egen ID_adm2 = concat(c ID_0N100 r ID_1N100 r ID_2N100)
drop c r

* Need to assume once again that some ADM1 regions are ADM2 regions as they are missing in our data
	replace id_2=0 if id_1!=. & id_2==. //save one observation where there is actually one obs with project side for adm1 region    @Melvin: Keine Änderungen werden angezeigt??? //MW: Possible explanation; Lennart changed disbursement.dta. Previously only projects with code "C" instead of "D" where included.
	drop if id_2==. //there are a lot of them without data on location  KG: @Melvin: A lot? Stata says 63? Komisch dass ich in dem TempFile die ID_2 Variable nicht sehe? Oder wird das nicht angezeigt? Ich sehe nur ID_adm2 ID_2N100
	  
	//create dummy variable indicating if a GADM2 region is missing, thus have been replaced by GADM1 region
	gen byte d_miss_ADM2=(id_2==0 & id_1!=0)

* Merge with Population data
renvars transaction_year ID_adm2 / year rid2
merge m:1 rid2 year using "$data\ADM\1_1_1_R_pop_GADM2.dta", nogen
renvars year rid2 /  transaction_year ID_adm2

* Create Incidence Count
bysort ID_adm1 ethnic transaction_year: egen pop_adm1=sum(isum_pop)
gen incidence_adm24=(incidence_adm14*isum_pop)/pop_adm1
replace incidence_adm24=round(incidence_adm24,1)
* Weight Aid
foreach var in best_est_adm  best_est_t1_adm best_est_t2_adm best_est_t3g_adm best_est_t3ng_adm {
gen `var'24=(`var'14*isum_pop)/pop_adm1
replace `var'24=round(`var'24,1)
}
keep best_est_adm24 best_est_t1_adm24 best_est_t2_adm24 best_est_t3g_adm24 best_est_t3ng_adm24 incidence_adm24 transaction_year ISO3 ADM0 ADM1 ID_adm1 ID_adm2 ethnic

save Conflict_Wpop_ADM2_prec4.dta, replace 

use brdprec1234.dta, clear
keep if where_prec==1 | where_prec==2 | where_prec==3

********************************************************************************
//Generate yearly regional totals
********************************************************************************
* Prepare a count variable for yearly conflict incidences
gen count=1
bysort ID_adm2 transaction_year ethnic: egen incidence_adm2=total(count)


* Here we loose information on whether it was an active year of an ongoing conflict and the type of violence. This needs to be discussed in the group, whether we can miss these variables
* Some regions miss ADM2 codes. Here we should check if these are similar countries like in the Aid-GADM merge (e.g., Cape Verde, Macedonia, Armenia...)
collapse (sum) best_est* (mean) incidence_adm2, by(ADM0 ADM1 ADM2  ISO3 transaction_year ID_adm1 ID_adm2 ethnic)
renvars best_estn100   best_est_t1 best_est_t2 best_est_t3g best_est_t3ng \ best_est_adm2 best_est_t1_adm2 best_est_t2_adm2 best_est_t3g_adm2 best_est_t3ng_adm2
merge 1:1 ID_adm2 transaction_year ethnic using Conflict_ADM2_prec4.dta, nogen keep(1 3)
* Replace zeroes
foreach var in best_est_adm2 best_est_t1_adm2 best_est_t2_adm2 best_est_t3g_adm2 best_est_t3ng_adm2 incidence_adm2 best_est_adm24 best_est_t1_adm24 best_est_t2_adm24 best_est_t3g_adm24 ///
best_est_t3ng_adm24 incidence_adm24 {
replace `var'=0 if `var'==.
}
* Add up BRDs
foreach v in best_est_adm2 best_est_t1_adm2 best_est_t2_adm2 best_est_t3g_adm2 best_est_t3ng_adm2 incidence_adm2{
replace `v' = `v'+ `v'4
}

drop  best_est_adm24 best_est_t1_adm24 best_est_t2_adm24 best_est_t3g_adm24 best_est_t3ng_adm24 incidence_adm24 
reshape wide best_est_adm2 best_est_t1_adm2 best_est_t2_adm2 best_est_t3g_adm2 best_est_t3ng_adm2 incidence_adm2 , i(ADM0 ADM1 ADM2  ISO3 transaction_year ID_adm1 ID_adm2) j(ethnic)
renvars  best_est_adm20 best_est_t1_adm20 best_est_t2_adm20 best_est_t3g_adm20 best_est_t3ng_adm20 incidence_adm20 ///
  \ best_est_adm2_nonethnic best_est_t1_adm2_nonethnic best_est_t2_adm2_nonethnic best_est_t3g_adm2_nonethnic best_est_t3ng_adm2_nonethnic incidence_adm2_nonethnic
renvars  best_est_adm21 best_est_t1_adm21 best_est_t2_adm21 best_est_t3g_adm21 best_est_t3ng_adm21 incidence_adm21 ///
  \ best_est_adm2_ethnic best_est_t1_adm2_ethnic best_est_t2_adm2_ethnic best_est_t3g_adm2_ethnic best_est_t3ng_adm2_ethnic incidence_adm2_ethnic


* Fill Up missings as we assume that our dataset is comprehensive
foreach e in ethnic nonethnic{
foreach v in best_est_adm2 best_est_t1_adm2 best_est_t2_adm2 best_est_t3g_adm2 best_est_t3ng_adm2 incidence_adm2{
replace `v'_`e'=0 if best_est_adm2_`e'==.  
}
}



sum best* 
duplicates tag ISO3 ADM1 transaction_year, gen(tag)
drop if tag>1 & ADM2==""
drop tag


	//drop if ID_2n100==0 
	replace ID_adm2="ADM2 Missing" if ID_adm1!="" & ID_adm2=="c0r0r0" //save one observation where there is actually one obs with project side for adm1 region  
	drop if ID_adm2=="c0r0r0" 
	drop if ID_adm1=="c0r0"
	
	//create dummy variable indicating if a GADM2 region is missing, thus have been replaced by GADM1 region
	gen byte d_miss_ADM2=(ID_adm2=="ADM2 Missing" & ID_adm1!="")

destring transaction_year, replace
	****create balanced dataset without gaps (assumption perfect data on conflict occurence, that is, if there is no data, then it is not missing but no conflict at all, = 0) 
	//ADM2 level
	sort ID_adm1 transaction_year
	egen ID_adm2_num = group(ID_adm2)
	//2. tsset Geounit Jahr
	tsset ID_adm2_num transaction_year
	// 3. tsfill, full
	tsfill, full //fill out data gaps
	gen years_reverse =-transaction_year
	// 4. carryforward, countryname etc
	bysort ID_adm2_num (transaction_year): carryforward ID_adm* ADM* ISO3 ID_* d_miss_ADM2, replace 
	bysort ID_adm2_num (years_reverse): carryforward ID_adm*  ADM* ISO3 ID_* d_miss_ADM2, replace
	// 5. replace Conflictvar= 0 if Conflictvar==.
foreach e in ethnic nonethnic{
foreach v in best_est_adm2 best_est_t1_adm2 best_est_t2_adm2 best_est_t3g_adm2 best_est_t3ng_adm2 incidence_adm2{
replace `v'_`e'=0 if `v'_`e'==.  
}
}
	drop years_reverse
	order transaction_year ID_adm*
	sort ID_adm* transaction_year
	* Calculate ADM1 level battle related deaths
	foreach e in ethnic nonethnic{
	foreach v in best_est_adm best_est_t1_adm best_est_t2_adm best_est_t3g_adm best_est_t3ng_adm incidence_adm{

bysort ID_adm1 transaction_year: egen `v'1_`e'=total(`v'2_`e')
}
}
* Generate Totals of ethnic and non-ethnic conflicts
foreach t in best_est_adm best_est_t1_adm best_est_t2_adm best_est_t3g_adm best_est_t3ng_adm incidence_adm {
foreach v in 1 2{
gen `t'`v'=`t'`v'_ethnic+`t'`v'_nonethnic
label var `t'`v' "Overall casulaties/incidences per ADM`v' region per year"
}
}


	save "$data\Conflict Data\UCDP_GED_ADM2_tsfill(Ethnic vs Non-Ethnic).dta", replace

	
use `brdprec1234', clear
keep if where_prec==1 | where_prec==2 | where_prec==3

********************************************************************************
//Generate yearly regional totals
********************************************************************************
* Prepare a count variable for yearly conflict incidences
gen count=1
bysort ID_adm2 transaction_year ethnic: egen incidence_adm2=total(count)


* Here we loose information on whether it was an active year of an ongoing conflict and the type of violence. This needs to be discussed in the group, whether we can miss these variables
* Some regions miss ADM2 codes. Here we should check if these are similar countries like in the Aid-GADM merge (e.g., Cape Verde, Macedonia, Armenia...)
collapse (sum) best_est* (mean) incidence_adm2, by(ADM0 ADM1 ADM2  ISO3 transaction_year ID_adm1 ID_adm2 ethnic)
renvars best_estn100   best_est_t1 best_est_t2 best_est_t3g best_est_t3ng \ best_est_adm2 best_est_t1_adm2 best_est_t2_adm2 best_est_t3g_adm2 best_est_t3ng_adm2
merge 1:1 ID_adm2 transaction_year ethnic using `Conflict_Wpop_ADM2_prec4', nogen keep(1 3)
* Replace zeroes
foreach var in best_est_adm2 best_est_t1_adm2 best_est_t2_adm2 best_est_t3g_adm2 best_est_t3ng_adm2 incidence_adm2 best_est_adm24 best_est_t1_adm24 best_est_t2_adm24 best_est_t3g_adm24 ///
best_est_t3ng_adm24 incidence_adm24 {
replace `var'=0 if `var'==.
}
* Add up BRDs
foreach v in best_est_adm2 best_est_t1_adm2 best_est_t2_adm2 best_est_t3g_adm2 best_est_t3ng_adm2 incidence_adm2{
replace `v' = `v'+ `v'4
}

drop  best_est_adm24 best_est_t1_adm24 best_est_t2_adm24 best_est_t3g_adm24 best_est_t3ng_adm24 incidence_adm24 
reshape wide best_est_adm2 best_est_t1_adm2 best_est_t2_adm2 best_est_t3g_adm2 best_est_t3ng_adm2 incidence_adm2 , i(ADM0 ADM1 ADM2  ISO3 transaction_year ID_adm1 ID_adm2) j(ethnic)
renvars  best_est_adm20 best_est_t1_adm20 best_est_t2_adm20 best_est_t3g_adm20 best_est_t3ng_adm20 incidence_adm20 ///
  \ best_est_adm2_nonethnic best_est_t1_adm2_nonethnic best_est_t2_adm2_nonethnic best_est_t3g_adm2_nonethnic best_est_t3ng_adm2_nonethnic incidence_adm2_nonethnic
renvars  best_est_adm21 best_est_t1_adm21 best_est_t2_adm21 best_est_t3g_adm21 best_est_t3ng_adm21 incidence_adm21 ///
  \ best_est_adm2_ethnic best_est_t1_adm2_ethnic best_est_t2_adm2_ethnic best_est_t3g_adm2_ethnic best_est_t3ng_adm2_ethnic incidence_adm2_ethnic


* Fill Up missings as we assume that our dataset is comprehensive
foreach e in ethnic nonethnic{
foreach v in best_est_adm2 best_est_t1_adm2 best_est_t2_adm2 best_est_t3g_adm2 best_est_t3ng_adm2 incidence_adm2{
replace `v'_`e'=0 if best_est_adm2_`e'==.  
}
}


sum best* 
duplicates tag ISO3 ADM1 transaction_year, gen(tag)
drop if tag>1 & ADM2==""
drop tag


	//drop if ID_2n100==0 
	replace ID_adm2="ADM2 Missing" if ID_adm1!="" & ID_adm2=="c0r0r0" //save one observation where there is actually one obs with project side for adm1 region  
	drop if ID_adm2=="c0r0r0" 
	drop if ID_adm1=="c0r0"
	
	//create dummy variable indicating if a GADM2 region is missing, thus have been replaced by GADM1 region
	gen byte d_miss_ADM2=(ID_adm2=="ADM2 Missing" & ID_adm1!="")

destring transaction_year, replace
	****create balanced dataset without gaps (assumption perfect data on conflict occurence, that is, if there is no data, then it is not missing but no conflict at all, = 0) 
	//ADM2 level
	sort ID_adm1 transaction_year
	egen ID_adm2_num = group(ID_adm2)
	//2. tsset Geounit Jahr
	tsset ID_adm2_num transaction_year
	// 3. tsfill, full
	tsfill, full //fill out data gaps
	gen years_reverse =-transaction_year
	// 4. carryforward, countryname etc
	bysort ID_adm2_num (transaction_year): carryforward ID_adm* ADM* ISO3 ID_* d_miss_ADM2, replace 
	bysort ID_adm2_num (years_reverse): carryforward ID_adm*  ADM* ISO3 ID_* d_miss_ADM2, replace
	// 5. replace Conflictvar= 0 if Conflictvar==.
foreach e in ethnic nonethnic{
foreach v in best_est_adm2 best_est_t1_adm2 best_est_t2_adm2 best_est_t3g_adm2 best_est_t3ng_adm2 incidence_adm2{
replace `v'_`e'=0 if `v'_`e'==.  
}
}
	drop years_reverse
	order transaction_year ID_adm*
	sort ID_adm* transaction_year
	* Calculate ADM1 level battle related deaths
	foreach e in ethnic nonethnic{
	foreach v in best_est_adm best_est_t1_adm best_est_t2_adm best_est_t3g_adm best_est_t3ng_adm incidence_adm{

bysort ID_adm1 transaction_year: egen `v'1_`e'=total(`v'2_`e')
}
}
* Generate Totals of ethnic and non-ethnic conflicts
foreach t in best_est_adm best_est_t1_adm best_est_t2_adm best_est_t3g_adm best_est_t3ng_adm incidence_adm {
foreach v in 1 2{
gen `t'`v'=`t'`v'_ethnic+`t'`v'_nonethnic
label var `t'`v' "Overall casulaties/incidences per ADM`v' region per year"
}
}


	save "$data\Conflict Data\UCDP_GED_ADM2_Wpop_tsfill(Ethnic vs Non-Ethnic).dta", replace
	
	


***************************
* Generate Conflict in adjacent regions 

** ADM2
* Load adjacency matrix for ADM2 regions
import excel using "$data\ADM\adm2_neighbors.xls", firstrow clear  //This dataset was created via adjacent_adm_classification.py. We use this adjacency matrix to match each ADM region with the battle related deaths in adjacent ADM regions.
* Drop Adjacent Regions in other country
drop if src_Name_0C75!= nbr_NAME_0C75
rename nbr_ID_ADMC12 ID_adm2
save "$data\ADM\adm2_neighbors.dta", replace

* Merge Adjacency matrix with conflict in adjacent regions
forvalues i=1995(1)2012 {
use "$data\Conflict Data\UCDP_GED_ADM2_tsfill(Ethnic vs Non-Ethnic).dta", clear // The battle related deaths are matched in this step with the adjacent regions. Afterwards we collapse to receive the sum of the conflict casualties in adjacent regions.
drop if transaction_year!=`i'
merge 1:m ID_adm2 using "$data\ADM\adm2_neighbors.dta", nogen keep(3 1)
* Collapse to get sum of battle related deaths in adjacent regions
collapse (sum) best_est*adm2* incidence*adm2* , by(src_ID_admC12  transaction_year)
rename src_ID_admC12 ID_adm2
duplicates report ID_adm2
* Rename sums for adjacent regions
renvars best_est_adm2 best_est_t1_adm2 best_est_t2_adm2 best_est_t3g_adm2 best_est_t3ng_adm2 incidence_adm2  /// 
/ best_est_adm2_adj best_est_t1_adm2_adj best_est_t2_adm2_adj best_est_t3g_adm2_adj best_est_t3ng_adm2_adj incidence_adm2_adj
foreach e in ethnic nonethnic{
renvars best_est_adm2_`e' best_est_t1_adm2_`e' best_est_t2_adm2_`e' best_est_t3g_adm2_`e' best_est_t3ng_adm2_`e' incidence_adm2_`e'  /// 
/ best_est_adm2_adj_`e' best_est_t1_adm2_adj_`e' best_est_t2_adm2_adj_`e' best_est_t3g_adm2_adj_`e' best_est_t3ng_adm2_adj_`e' incidence_adm2_adj_`e'
}
keep ID_adm2 best_est*_adm2_adj* transaction_year  incidence_adm2_adj*
tempfile `i'
save `i', replace 
}
* Put yearly battle related deaths together
clear
use 1995
forvalues i=1996(1)2012 {
append using `i'

}

* Merging the battle related deaths in adjacent regions with the initial file on battle related deaths in regions itself
merge 1:1 ID_adm2 transaction_year using "$data\Conflict Data\UCDP_GED_ADM2_tsfill(Ethnic vs Non-Ethnic).dta", nogen keep(2 3)  


foreach v in best_est_adm2_adj best_est_t1_adm2_adj best_est_t2_adm2_adj best_est_t3g_adm2_adj best_est_t3ng_adm2_adj incidence_adm2_adj{
label var `v' "Battle related death in all adjacent administrative regions (best estimate)"
replace `v'=0 if `v'==.
foreach e in ethnic nonethnic{
label var `v'_`e' "Battle related death in all adjacent administrative regions (best estimate) from `e' war"
* Replace missings (Given the assumption that our dataset is comprehensive)
replace `v'_`e'=0 if `v'_`e'==.
}
}
tempfile ADM2adj
save `ADM2adj', replace 


** ADM1
* Load adjacency matrix for ADM1 regions
import excel using "$data\ADM\adm1_neighbors.xls", firstrow clear
renvars src_ID_0N100 src_ID_1N100  nbr_ID_0N100 nbr_ID_1N100 / src_id_0 src_id_1 nbr_id_0 nbr_id_1
*create unique region ids
gen c = "c"
gen r = "r"
egen src_ID_admC7 = concat(c src_id_0 r src_id_1)
egen nbr_ID_admC7 = concat(c nbr_id_0 r nbr_id_1)
drop c r
rename nbr_ID_admC7 ID_adm1
* Drop Adjacent Regions in other country
drop if src_Name_0C75!= nbr_NAME_0C75


save "$data\ADM\adm1_neighbors.dta", replace

* Merge Adjacency matrix with conflict in adjacent regions
forvalues i=1995(1)2012 {
use "$data\Conflict Data\UCDP_GED_ADM2_tsfill(Ethnic vs Non-Ethnic).dta", clear // The battle related deaths are matched in this step with the adjacent regions. Afterwards we collapse to receive the sum of the conflict casualties in adjacent regions.
drop if transaction_year!=`i'
collapse (mean) best_est*_adm1* incidence_adm1* , by(ID_adm1 transaction_year)
merge 1:m ID_adm1 using "$data\ADM\adm1_neighbors.dta", nogen keep(3 1)
* Collapse to get sum of battle related deaths in adjacent regions
collapse (sum) best_est*_adm1* incidence_adm1* , by(src_ID_admC7 transaction_year)
rename src_ID_admC7  ID_adm1
duplicates report ID_adm1
* Rename Sums to indicate that they are based on adjacent regions
renvars best_est_adm1 best_est_t1_adm1 best_est_t2_adm1 best_est_t3g_adm1 best_est_t3ng_adm1 incidence_adm1  /// 
/ best_est_adm1_adj best_est_t1_adm1_adj best_est_t2_adm1_adj best_est_t3g_adm1_adj best_est_t3ng_adm1_adj incidence_adm1_adj
foreach e in ethnic nonethnic{
renvars best_est_adm1_`e' best_est_t1_adm1_`e' best_est_t2_adm1_`e' best_est_t3g_adm1_`e' best_est_t3ng_adm1_`e' incidence_adm1_`e'  /// 
/ best_est_adm1_adj_`e' best_est_t1_adm1_adj_`e' best_est_t2_adm1_adj_`e' best_est_t3g_adm1_adj_`e' best_est_t3ng_adm1_adj_`e' incidence_adm1_adj_`e'
}

keep ID_adm1 best_est*_adm1_adj*  incidence_adm1_adj* transaction_year
tempfile `i'
save `i', replace 
}
* Put yearly battle related deaths together
clear
use 1995
forvalues i=1996(1)2012 {
append using `i'

}

* Merging the battle related deaths in adjacent regions with the initial file on battle related deaths in regions itself
merge 1:m ID_adm1 transaction_year using `ADM2adj', nogen keep(2 3)  
* Provide Labels and Replace Missings
foreach v in best_est_adm1_adj best_est_t1_adm1_adj best_est_t2_adm1_adj best_est_t3g_adm1_adj best_est_t3ng_adm1_adj incidence_adm1_adj{
label var `v' "Battle related death in all adjacent administrative regions (best estimate)"
replace `v'=0 if `v'==.
foreach e in ethnic nonethnic{
label var `v'_`e' "Battle related death in all adjacent administrative regions (best estimate) from `e' war"
* Replace missings (Given the assumption that our dataset is comprehensive)
replace `v'_`e'=0 if `v'_`e'==.
}
}
save "$data\Conflict Data\UCDP_GED_ADM2_tsfill_adjacent(Ethnic vs Non-Ethnic).dta", replace
*/


**********************
**********************
* F: Create Controls *
**********************
**********************
*XXXXXX Melvin 05.02.2018: @Kai, do you know what the OBJECTID is? Where is the respective shapefile?
/*
* Prepare Night Light Data on the ADM1 level
cd "$rawdata\Control variables soil and nightlight and elevation\processed_light\adm1"
foreach i in F121995 F121996 F121997 F121998 F121999 F141997 F141998 F141999 F142000 F142001 F142002 F142003 F152000 F152001 F152002 F152003 F152004 F152005 F152006 F152007 F162004 F162005 F162006 F162007 F162008 F162009 F182010 F182011 F182012 F182013 {
import excel using "`i'.xls", firstrow clear
gen transaction_year=substr("`i'",4,7)
destring transaction_year, replace
tempfile `i'
save `i', replace 
}

foreach i in F121995 F121996 F121997 F121998 F121999 F141997 F141998 F141999 F142000 F142001 F142002 F142003 F152000 F152001 F152002 F152003 F152004 F152005 F152006 F152007 F162004 F162005 F162006 F162007 F162008 F162009 F182010 F182011 F182012 {
append using `i'
}
collapse (mean) MEAN AREA, by (OBJECTID transaction_year)
drop if transaction_year>2012 | transaction_year<1995
tempfile ADM1NLIGHTS
save "ADM1NLIGHTS.dta", replace



* Prepare Night Light Data on the ADM2 level
cd "$rawdata\Control variables soil and nightlight and elevation\processed_light\adm2"
foreach i in F121995 F121996 F121997 F121998 F121999 F141997 F141998 F141999 F142000 F142001 F142002 F142003 F152000 F152001 F152002 F152003 F152004 F152005 F152006 F152007 F162004 F162005 F162006 F162007 F162008 F162009 F182010 F182011 F182012 F182013 {
import excel using "`i'.xls", firstrow clear
gen transaction_year=substr("`i'",4,7)
destring transaction_year, replace
tempfile `i'
save `i', replace 
}

foreach i in F121995 F121996 F121997 F121998 F121999 F141997 F141998 F141999 F142000 F142001 F142002 F142003 F152000 F152001 F152002 F152003 F152004 F152005 F152006 F152007 F162004 F162005 F162006 F162007 F162008 F162009 F182010 F182011 F182012 {
append using `i'
}
collapse (mean) MEAN AREA, by (OBJECTID transaction_year)
drop if transaction_year>2012 | transaction_year<1995
tempfile ADM2NLIGHTS
save "ADM2NLIGHTS.dta", replace

*/

cd "$cwd"

*** Get indicator for missing ADM2 ***
use gadm2.dta, clear
duplicates drop ID_adm1, force
keep ID_adm1 d_miss_ADM2
save "temp_miss_ADM2_idADM1.dta", replace

use gadm2.dta, clear
duplicates drop ID_adm2, force
keep ID_adm1 ID_adm2 d_miss_ADM2
save "temp_miss_ADM2_idADM2.dta", replace

use gadm2.dta, clear
duplicates drop ID_adm2, force
keep ID_adm1 ID_adm2 d_miss_ADM2
keep if d_miss_ADM2==1
save "temp_miss_ADM2_missonly.dta", replace

****ADM1****
import excel "$data\adm1.xls", sheet("adm1") firstrow clear
//create unique region ids
gen c = "c"
gen r = "r"
egen ID_adm1 = concat(c ID_0 r ID_1)
merge m:1 ID_adm1 using "temp_miss_ADM2_idADM1.dta"
drop if _merge==2
//duplicates drop ID_adm1, force // necessary drop due to errors in GADM shapefiles. No loss of data
drop _merge c r FID HASC_1 CCN_1 CCA_1 TYPE_1 ENGTYPE_1 NL_NAME_1 VARNAME_1 Shape_Leng Shape_Area
save "adm1_cleaned.dta", replace

****ADM2****
import excel "$data\adm2.xls", sheet("adm2") firstrow clear
sort OBJECTID
//create unique region ids
gen c = "c"
gen r = "r"
egen ID_adm1 = concat(c ID_0 r ID_1)
egen ID_adm2 = concat(c ID_0 r ID_1 r ID_2)
merge m:1 ID_adm2 using "temp_miss_ADM2_idADM2.dta", nogen
//assing unique OBJECTID for missings
sum OBJECTID
local start=(`r(max)'+1)
capture drop count
gen count=1
sum count
local end=`r(N)'
forvalue i= `start'/`end' {
	replace OBJECTID=`i' if OBJECTID==. in `i'
}
//drop variables
drop c r FID HASC_2 CCN_2 CCA_2 TYPE_2 ENGTYPE_2 NL_NAME_2 VARNAME_2 Shape_Leng Shape_Area count
save "adm2_cleaned.dta", replace

***Elevation ADM1***
import excel "$data\processed_elevation\adm1elevation.xls", sheet("adm1elevation") firstrow clear
keep OBJECTID STD MIN MAX MEAN //OBJECTID Seems to be at the right variable to later merge the data back to the original adm1 and adm2 files
merge 1:1 OBJECTID using "adm1_cleaned.dta", nogen
// collapse on ADM level to correct for errors in shapefile
collapse (min) MIN (max) MAX d_miss_ADM2 (mean) STD MEAN, by (ID_adm1)
replace STD =0 if STD==.
replace MIN =0 if MIN==.
replace MAX =0 if MAX==.
replace MEAN=0 if MEAN==.

// give label names
label var  MIN "Minimal elevation in 30 arc seconds, zonal statistic arc gis"
label var  MAX "Maximal elevation in 30 arc second, zonal statistic arc gis"
label  var STD "Standard Deviation of elevation in 30 arc seconds, zonal statistic arc gis"
label  var MEAN "Mean elevation in 30 arc seconds, zonal statistic arc gis"

//rename
rename MEAN elevation_mean
rename MAX elevation_max
rename MIN elevation_min
rename STD elevation_std


save "adm1elevation_cleaned.dta", replace

***Elevation ADM2***
import excel "$data\processed_elevation\adm2elevation.xls", sheet("adm2elevation") firstrow clear
keep OBJECTID STD MIN MAX MEAN
merge 1:1 OBJECTID using "adm2_cleaned.dta", nogen
collapse (min) MIN (max) MAX d_miss_ADM2 (mean) STD MEAN, by (ID_adm1 ID_adm2)
replace STD =0 if STD==.
replace MIN =0 if MIN==.
replace MAX =0 if MAX==.
replace MEAN=0 if MEAN==.
// give label names
label var  MIN "Minimal elevation in 30 arc seconds, zonal statistic arc gis"
label var  MAX "Maximal elevation in 30 arc second, zonal statistic arc gis"
label  var STD "Standard Deviation of elevation in 30 arc seconds, zonal statistic arc gis"
label  var MEAN "Mean elevation in 30 arc seconds, zonal statistic arc gis"
//rename
rename MEAN elevation_mean
rename MAX elevation_max
rename MIN elevation_min
rename STD elevation_std
// save observations with complete information for appending later
preserve
keep if d_miss_ADM2==0
save "temp_nomiss.dta", replace
restore
keep if d_miss_ADM2==1
drop elevation_* //drop to avoid merging error
save "temp_missonly.dta", replace

//attach ADM1 data to missing ADM2 regions
use "temp_missonly.dta", clear
merge m:1 ID_adm1 using "adm1elevation_cleaned.dta", nogen keep(3)
keep if d_miss_ADM2==1
save "temp_withmiss.dta", replace

// put together ADM2 and ADM1 if ADM2 is missing
use "temp_nomiss.dta", clear
append using "temp_withmiss.dta"
save "adm2elevation_cleaned.dta", replace
// delete temp files
erase "temp_missonly.dta"
erase "temp_nomiss.dta"
erase "temp_withmiss.dta"

*****Lakes and Rivers ADM1****
import excel "$data\processed_lakes_rivers\adm1lakes_rivers.xls", sheet("adm1lakes_rivers") firstrow clear
keep OBJECTID featurecla NAME_1 //NAME_1 as merge control for ADM1
ren NAME_1 NAME_1_lakesrivers
gen Driver=0
replace Driver=1 if featurecla=="River"
gen Dlake=0
replace Dlake=1 if featurecla=="Lake Centerline"
bys OBJECTID: egen No_river=total(Driver)
collapse(mean) Dlake Driver, by(OBJECTID NAME_1_lakesrivers) //collaspe to get unique OBJECTID
//attach information on regions
merge 1:1 OBJECTID using "adm1_cleaned.dta", nogen
replace Driver=0 if Driver==.
replace Dlake=0 if Dlake==.
//collapse by region
collapse (max) Dlake Driver d_miss_ADM2, by (ID_adm1)
//prepare data
replace Driver=1 if Driver!=0
replace Dlake=1 if Dlake!=0
label var  Driver "1 if river in district, intersect in arcgis"
label  var Dlake "1 if lake in district, intersect in arcgis"
save "adm1lakesrivers_cleaned.dta", replace


*****Lakes and Rivers ADM2****
import excel "$data\processed_lakes_rivers\adm2lakes_rivers.xls", sheet("adm2lakes_rivers") firstrow clear
keep OBJECTID featurecla NAME_2 //Name_2 as merge control for ADM2
ren NAME_2 NAME_2_lakesrivers
gen Driver=0
replace Driver=1 if featurecla=="River"
gen Dlake=0
replace Dlake=1 if featurecla=="Lake Centerline"
bys OBJECTID: egen No_river=total(Driver)
collapse(mean) Dlake Driver, by(OBJECTID NAME_2_lakesrivers)
//attach information on regions
merge 1:1 OBJECTID using "adm2_cleaned.dta", nogen
replace Driver=0 if Driver==.
replace Dlake=0 if Dlake==.
//collapse by region
collapse (max) Dlake Driver d_miss_ADM2, by (ID_adm1 ID_adm2)
//prepare data
replace Driver=1 if Driver!=0
replace Dlake=1 if Dlake!=0
label  var Driver "1 if river in district, intersect in arcgis"
label  var Dlake "1 if lake in district, intersect in arcgis"
// save observations with complete information for appending later
preserve
keep if d_miss_ADM2==0
save "temp_nomiss.dta", replace
restore
keep if d_miss_ADM2==1
drop Driver Dlake
save "temp_missonly.dta", replace

//attach ADM1 data to missing ADM2 regions
use "temp_missonly.dta", clear
merge m:1 ID_adm1 using "adm1lakesrivers_cleaned.dta", nogen keep(3)
keep if d_miss_ADM2==1
save "temp_withmiss.dta", replace

// put together ADM2 and ADM1 if ADM2 is missing
use "temp_nomiss.dta", clear
append using "temp_withmiss.dta"
save "adm2lakesrivers_cleaned.dta", replace
// delete temp files
erase "temp_missonly.dta"
erase "temp_nomiss.dta"
erase "temp_withmiss.dta"

*****Borders ADM1*****
import excel "$data\processed_borders\adm1borders.xls", sheet("adm1borders") firstrow clear
keep OBJECTID NAME_1
ren NAME_1 NAME_1_Borders
gen Dborder=1
collapse(max) Dborder, by(OBJECTID)
merge 1:1 OBJECTID using "adm1_cleaned.dta", nogen
replace Dborder=0 if Dborder==.
// collapse on ADM level to correct for errors in shapefile
collapse (max) Dborder d_miss_ADM2, by (ID_adm1)
label var Dborder "1 if district at country border, intersect in arcgis"
save "adm1borders_cleaned.dta", replace

*****Borders ADM2*****
import excel "$data\processed_borders\adm2borders.xls", sheet("adm2borders") firstrow clear
keep OBJECTID NAME_2
ren NAME_2 NAME_2_borders

gen Dborder=1
collapse(max) Dborder, by(OBJECTID)
merge 1:1 OBJECTID using "adm2_cleaned.dta", nogen
replace Dborder=0 if Dborder==.
// collapse on ADM level to correct for errors in shapefile
collapse (max) Dborder d_miss_ADM2, by(ID_adm1 ID_adm2)
label var  Dborder "1 if district at country border, intersect in arcgis"
// save observations with complete information for appending later
preserve
keep if d_miss_ADM2==0
save "temp_nomiss.dta", replace
restore
keep if d_miss_ADM2==1
drop Dborder
save "temp_missonly.dta", replace

//attach ADM1 data to missing ADM2 regions
use "temp_missonly.dta", clear
merge m:1 ID_adm1 using "adm1borders_cleaned.dta", nogen keep(3)
keep if d_miss_ADM2==1
save "temp_withmiss.dta", replace

// put together ADM2 and ADM1 if ADM2 is missing
use "temp_nomiss.dta", clear
append using "temp_withmiss.dta"
save "adm2borders_cleaned.dta", replace
// delete temp files
erase "temp_missonly.dta"
erase "temp_nomiss.dta"
erase "temp_withmiss.dta"



******Oceans ADM1******
import excel "$data\processed_ocean_adjacent\adm1ocean_adjacent.xls", sheet("adm1ocean_adjacent") firstrow clear
keep OBJECTID NAME_1
ren NAME_1 NAME_1_oceans
gen Docean=1
collapse(max) Docean, by(OBJECTID)
merge 1:1 OBJECTID using "adm1_cleaned.dta", nogen
replace Docean=0 if Docean==.
// collapse on ADM level to correct for errors in shapefile
collapse (max) Docean d_miss_ADM2, by (ID_adm1)

label  var Docean "1 if district borders ocean,  intersect in arcgis"
save "adm1oceans_cleaned.dta", replace

******Oceans ADM2******
import excel "$data\processed_ocean_adjacent\adm2ocean_adjacent.xls", sheet("adm2ocean_adjacent") firstrow clear
keep OBJECTID NAME_2
ren NAME_2 NAME_2_oceans
gen Docean=1
collapse(max) Docean, by(OBJECTID)
merge 1:1 OBJECTID using "adm2_cleaned.dta", nogen
replace Docean=0 if Docean==.
// collapse on ADM level to correct for errors in shapefile
collapse (max) Docean d_miss_ADM2, by(ID_adm1 ID_adm2)
label var Docean "1 if district borders ocean,  intersect in arcgis"
// save observations with complete information for appending later
preserve
keep if d_miss_ADM2==0
save "temp_nomiss.dta", replace
restore
keep if d_miss_ADM2==1
drop Docean
save "temp_missonly.dta", replace

//attach ADM1 data to missing ADM2 regions
use "temp_missonly.dta", clear
merge m:1 ID_adm1 using "adm1oceans_cleaned.dta", nogen keep(3)
keep if d_miss_ADM2==1
save "temp_withmiss.dta", replace

// put together ADM2 and ADM1 if ADM2 is missing
use "temp_nomiss.dta", clear
append using "temp_withmiss.dta"
save "adm2oceans_cleaned.dta", replace
// delete temp files
erase "temp_missonly.dta"
erase "temp_nomiss.dta"
erase "temp_withmiss.dta"

*** Load Capital Distance Data and ADM Centroids - ADM1 ***
use "$data\Capitals\dist_capital_GADM1.dta", clear
//attach variable of missing ADM regions
merge 1:1 ID_adm1 using "temp_miss_ADM2_idADM1.dta", nogen keep(1 3)
label var d_capital "Dummy if the ADM region hosts the country capital"
label var xcoord "x-coordinate of ADM region's centroid"
label var ycoord "y-coordinate of ADM region's centroid"
save "ADM1CAPITALCOORD.dta", replace

*** Load Capital Distance Data and ADM Centroids - ADM2 ***
use "$data\Capitals\dist_capital_GADM2.dta", clear
//attach variable of missing ADM regions
merge 1:1 ID_adm2 using "temp_miss_ADM2_idADM2.dta", nogen
// save observations with complete information for appending later
preserve
keep if d_miss_ADM2==0
save "temp_nomiss.dta", replace
restore
keep if d_miss_ADM2==1
drop dist_capital
save "temp_missonly.dta", replace

//attach ADM1 data to missing ADM2 regions
use "temp_missonly.dta", clear
merge m:1 ID_adm1 using "ADM1CAPITALCOORD.dta", nogen keep(3)
keep if d_miss_ADM2==1
save "temp_withmiss.dta", replace

// put together ADM2 and ADM1 if ADM2 is missing
use "temp_nomiss.dta", clear
append using "temp_withmiss.dta"
save "ADM2CAPITALCOORD.dta", replace
// delete temp files
erase "temp_missonly.dta"
erase "temp_nomiss.dta"
erase "temp_withmiss.dta"


*** Load PRIO Controls - ADM1 ***
use "$data\GridCells\prio_GADM1_controls.dta", clear
drop if year>2014 | year<1995
//attach variable of missing ADM regions
merge m:1 ID_adm1 using "temp_miss_ADM2_idADM1.dta", nogen keep(1 3)
rename year transaction_year
save "ADM1PRIOCONTROLS.dta", replace

*** Load PRIO Controls - ADM2 ***
use "$data\GridCells\prio_GADM2_controls.dta", clear
drop if year>2014 | year<1995
//attach variable of missing ADM regions
merge m:1 ID_adm2 using "temp_miss_ADM2_idADM2.dta"
rename year transaction_year
// save observations with complete information for appending later
preserve
//Alternatively: keep if d_miss_ADM2==0 | transaction_year!=.
keep if _merge==3
drop _merge
save "temp_nomiss.dta", replace
restore

//add time dimension to ADM1 regions with missing ADM2 information
keep if _merge!=3
drop _merge transaction_year landarea ttime_mean excluded prec_gpcc temp droughtstart_spi droughtend_spi petroleum_s diamsec_s diamprim_s goldplacer_s goldsurface_s goldvein_s gem_s petroleum_y diamsec_y diamprim_y goldplacer_y goldsurface_y goldvein_y gem_y drug_y
gen transaction_year=1995
	//Melvin H.L. Wong: 1. sort variables
	sort ID_adm1 transaction_year
	egen ID_adm1_num = group(ID_adm1)
	//Melvin H.L. Wong: 2. tsset Geounit Jahr
	tsset ID_adm1_num transaction_year
	//Expand dataset to match time period from 1995-2012
	tsappend, add(17)
	//Melvin H.L. Wong: 3. tsfill, full
	tsfill, full //fill out data gaps
	gen years_reverse =-transaction_year
	//Melvin H.L. Wong: 4. carryforward, countryname etc
	bysort ID_adm1_num (transaction_year): carryforward ID_adm* d_miss_ADM2, replace 
	bysort ID_adm1_num (years_reverse): carryforward ID_adm* d_miss_ADM2, replace
	//Melvin H.L. Wong: 5. clean up
	drop years_reverse ID_adm1_num
	sort ID_adm1 transaction_year
save "temp_missonly.dta", replace

//attach ADM1 data to missing ADM2 regions
use "temp_missonly.dta", clear
merge 1:m ID_adm1 transaction_year using "ADM1PRIOCONTROLS.dta", nogen keep(3)
/*
Mismatch from master are countries without ADM1 regions. ok to drop.
Mismatch from using are the matched ones which will be appended later
*/
save "temp_withmiss.dta", replace

// put together ADM2 and ADM1 if ADM2 is missing
use "temp_nomiss.dta", clear
append using "temp_withmiss.dta"
save "ADM2PRIOCONTROLS.dta", replace
// delete temp files
erase "temp_missonly.dta"
erase "temp_nomiss.dta"
erase "temp_withmiss.dta"


*** prepare population data - ADM1 ***
use "$data\ADM\1_1_1_R_pop_GADM1.dta", clear
rename year transaction_year
rename rid1 ID_adm1
rename isum_pop isum_pop_ADM1
keep transaction_year ID_adm1 isum_pop_ADM1
save "pop_adm1.dta", replace

*** prepare population data - ADM2 ***
use "$data\ADM\1_1_1_R_pop_GADM2.dta", clear
rename year transaction_year
rename rid2 ID_adm2
rename isum_pop isum_pop_ADM2
keep transaction_year ID_adm2 isum_pop_ADM2
//get indicator of missing regions
merge m:1 ID_adm2 using "temp_miss_ADM2_idADM2.dta"
// save observations with complete information for appending later
preserve
//Alternatively: keep if d_miss_ADM2==0 | transaction_year!=.
keep if _merge==3
drop _merge
save "temp_nomiss.dta", replace
restore
//add time dimension to ADM1 regions with missing ADM2 information
keep if _merge!=3
drop _merge transaction_year 
gen transaction_year=1995
	//Melvin H.L. Wong: 1. sort variables
	sort ID_adm1 transaction_year
	egen ID_adm1_num = group(ID_adm1)
	//Melvin H.L. Wong: 2. tsset Geounit Jahr
	tsset ID_adm1_num transaction_year
	//Expand dataset to match time period from 1995-2012
	tsappend, add(17)
	//Melvin H.L. Wong: 3. tsfill, full
	tsfill, full //fill out data gaps
	gen years_reverse =-transaction_year
	//Melvin H.L. Wong: 4. carryforward, countryname etc
	bysort ID_adm1_num (transaction_year): carryforward ID_adm* d_miss_ADM2, replace 
	bysort ID_adm1_num (years_reverse): carryforward ID_adm* d_miss_ADM2, replace
	//Melvin H.L. Wong: 5. clean up
	drop years_reverse ID_adm1_num
	sort ID_adm1 transaction_year
save "temp_missonly.dta", replace

//attach ADM1 data to missing ADM2 regions
use "temp_missonly.dta", clear
merge 1:m ID_adm1 transaction_year using "pop_adm1.dta", nogen keep(3)
/*
Mismatch from master are countries without ADM1 regions. ok to drop.
Mismatch from using are the matched ones which will be appended later
*/
save "temp_withmiss.dta", replace

// put together ADM2 and ADM1 if ADM2 is missing
use "temp_nomiss.dta", clear
append using "temp_withmiss.dta"
//fill out missing ADM2 observations with ADM1 Observation
replace isum_pop_ADM2=isum_pop_ADM1 if isum_pop_ADM2==.
drop isum_pop_ADM1
save "pop_adm2.dta", replace
// delete temp files
erase "temp_missonly.dta"
erase "temp_nomiss.dta"
erase "temp_withmiss.dta"


*** Create Nightlights variable - ADM1 ***
import delimited using "$data\nightlights\adm1_sumlight.csv", clear
drop v1
rename sumlightmeansum lights_sum
rename year transaction_year
rename id_adm1 ID_adm1
replace lights_sum="." if lights_sum=="NA"
destring lights_sum, replace
* merge popdata to create lights üer capita
merge 1:1 ID_adm1 transaction_year using "pop_adm1.dta"
keep if transaction_year>=1992 & transaction_year<=2013
tab _merge
gen lights_pc = lights_sum/ isum_pop_ADM1
drop _merge isum_pop_ADM1
label var lights_sum "Mean of sum of regional nightlights"
save "lights_adm1.dta", replace

*** Create Nightlights variable - ADM2 ***
import delimited using "$data\nightlights\adm2_sumlight.csv", clear
drop v1
rename sumlightmeansum lights_sum
rename year transaction_year
rename id_adm2 ID_adm2
replace lights_sum="." if lights_sum=="NA"
destring lights_sum, replace
* merge popdata to create lights üer capita
merge 1:1 ID_adm2 transaction_year using "pop_adm2.dta"
keep if transaction_year>=1992 & transaction_year<=2013
tab _merge
gen lights_pc = lights_sum/ isum_pop_ADM2
drop _merge isum_pop_ADM2
label var lights_pc "lights_sum divided by isum_pop"
save "lights_adm2.dta", replace

*** Get country list where population <1 million ***
import delimited using "$data\ADM\gadm28_adm0_valid.csv", clear
rename name_engli ADM0
rename iso ISO3
rename id_0 id_0n100
keep id_0n100 ISO3 ADM0 pop2000 unregion1 unregion2
gen d_smallcountry = 0
replace d_smallcountry=1 if pop2000<1000000
label var d_smallcountry "Dummy if recipient country has a population below 1 Mio. in 2000" 
save "d_pop.dta", replace

*** Load data for World Bank IDA Position & IBRD Equity to Loan Ratio IV IV ***
import excel using "$data\Aid\IDA_IBRD_Liquidity.xlsx", firstrow clear
destring IDA_position IBRD_EtL_Ratio, replace
label var IDA_position "IDA funding position from Dreher at al 2017"
label var IBRD_EtL_Ratio "IBRD equity to loan ratio from Dreher et al 2017"
keep if year>1994 & year<2015
rename year transaction_year
save "alt_iv.dta", replace

/*
*** Load PRIO Conflict data on ADM1 Level ***
use "$data\Conflict Data\1_1_prioConflict_GADM1_newId.dta", clear
keep rid1 year conf onset conf_L1 conf_L2 name_0c75 isoc3
renvars rid year name_0c75 isoc3 / ID_adm1 transaction_year ADM0 ISO3
drop if transaction_year>2012 | transaction_year<1995
save "ADM1PRIO.dta", replace
*/

*** Prepare Chinese steel production for IV ***
import excel using "$data\Steel Production\Chinese Steel 2000-2012.xlsx", firstrow clear
save "steel_iv.dta", replace 

*** Prepare Chinese FDI for IV Placebo Test ***
import excel using "$data\UNCTAD\us_fdiflowsstock_97701314821841.xlsx", firstrow clear
label var chinese_fdi "Value of Chinese Annual Outward FDI flows, US Dollars at current prices in Mio"
destring transaction_year, replace
save "chinese_fdi.dta", replace

*** Prepare Chinese Exports for IV Placebo Test ***
import excel using "$data\DOTS\Trade_of_Goods_13082018.xlsx", firstrow clear
rename exports chinese_exports
label var chinese_exports "Value of Chinese Exports, Free on board (FOB), US Dollars"
destring transaction_year, replace
save "chinese_exports.dta", replace


*** Prepare Chinese Imports for IV Placebo Test ***
import excel using "$data\DOTS\Trade_of_Goods_Global_20210907.xlsx", firstrow clear sheet("Exports, FOB")
keep if Country=="China, P.R.: Mainland"
reshape long Y, i(Country) j(transaction_year)
keep transaction_year Y
rename Y chinese_imports
label var chinese_imports "Value of Chinese Imports, Cost Insurance Freight (CIF), US Dollars"
destring transaction_year, replace
save "chinese_imports.dta", replace

*** Prepare Global FDI for IV Placebo Test ***
import excel using "$data\FDI (WDI)\FDI.xlsx", firstrow clear
keep if CountryName=="World"
reshape long Y, i(CountryName) j(transaction_year)
keep transaction_year Y
rename Y global_fdi
label var global_fdi "Value of Global FDI"
destring transaction_year, replace
save "global_fdi.dta", replace

*** Prepare Global Trade for IV Placebo Test ***
import excel using "$data\DOTS\Trade_of_Goods_Global_20210907.xlsx", firstrow clear sheet("Exports, FOB")
keep if Country=="World"
reshape long Y, i(Country) j(transaction_year)
keep transaction_year Y
rename Y global_exports
label var global_exports "Value of Global Exports, Free on board (FOB), US Dollars"
destring transaction_year, replace
save "global_exports.dta", replace


import excel using "$data\DOTS\Trade_of_Goods_Global_20210907.xlsx", firstrow clear sheet("Imports, CIF")
keep if Country=="World"
reshape long Y, i(Country) j(transaction_year)
keep transaction_year Y
rename Y global_imports
label var global_imports "Value of Global Imports, Cost Insurance Freight (CIF), US Dollars"
destring transaction_year, replace
save "global_imports.dta", replace


*xxxxxxxxxx Melvin 14.11.2017: Clean up folder and erase temp file


erase alg.dta
erase ancillary.dta
erase adm1_v.dta
//erase gadm1.dta //still needed for merging
//erase gadm2.dta //still needed for merging
erase gadm1_ids.dta
erase gadm2_ids.dta
erase country_pop.dta
erase OF.dta

erase IDA_disbursement.dta
erase IBRD_disbursement.dta
erase China_disbursement.dta
erase china_temp1.dta
erase India_disbursement.dta

erase ACD2EPR.dta
erase brdprec1234.dta

*******************************
* Prepare Bjornskov Rode Data *
*******************************

import excel using "$rawdata\Bjornskov & Rode\Bjørnskov-Rode-integrated-dataset-v2.2.xlsx", firstrow clear
rename countryisocode ISO3
rename year transaction_year
rename Democracy democracy_br

replace ISO3="COD" if ISO3=="ZAR" // replace ISO3 for Congo to the standard used in GADM file
keep ISO3 transaction_year democracy_br
save "$data\Bjornskov & Rode\Bjørnskov-Rode-integrated-dataset-v2.2.dta", replace

*******************
* Prepare Xpolity *
*******************
import delim using "$rawdata\Xpolity\xpolity_1800-2013.csv", clear
keep year scode democ xpolity country
rename year transaction_year
rename country ADM0
rename scode ISO3
replace ADM0="Congo, Dem. Rep." if ADM0=="Congo Kinshasa"
replace ADM0="Congo, Rep." if ADM0=="Congo Brazzaville"
replace ADM0="Côte d'Ivoire" if ADM0=="Ivory Coast"
replace ADM0="Yemen, North" if ADM0=="Yemen North"
replace ADM0="Yemen, South" if ADM0=="Yemen South"
duplicates drop ADM0 trans, force
label var xpolity "Polity 2 index corrected for endogeneous part"
save "$data\Xpolity\Xpolity.dta", replace


*****************
* Press Freedom *
*****************
import excel using "$rawdata\Freedom House\fotp_1995_2014.xlsx", firstrow clear
reshape long Total_Score_, i(A) j(transaction_year)
replace Total_Score_="." if Total_Score=="-"
destring Total_Score_, replace
rename Total_Score_ PressFreedom_Score
label var PressFreedom_Score "0(Min) to 100 (Max). Source: Freedom House"
rename A ADM0
* Unify country names
replace ADM0="Gambia" if ADM0=="The Gambia"
replace ADM0="Congo, Dem. Rep." if ADM0=="Congo (Kinshasa)"
replace ADM0="Congo, Rep." if ADM0=="Congo (Brazzaville)"
replace ADM0="Korea, Democratic Republic of" if ADM0=="North Korea"


save "$data\FreedomHouse\press_freedom.dta", replace


****************
* Transparency *
****************
use "$rawdata\HRV_Transparency\HRV2013.dta", clear
keep year transparencyindex country wdicode
rename country ADM0
rename wdicode ISO3
rename year transaction_year
* Unify country names
replace ADM0="Côte d'Ivoire" if ADM0=="Cote d'Ivoire"
replace ADM0="Gambia" if ADM0=="Gambia, The"
replace ADM0="Egypt" if ADM0=="Egypt, Arab Rep."
replace ADM0="Syria" if ADM0=="Syrian Arab Republic"
save "$data\HRV_Transparency\transparency.dta", replace


*********************
* Add Global Trends *
*********************
* XXX Lennart 09.01.2019: These three temp files need to be added to data creation file, when we review it next time
* Import MSCI World (Source: MSCI/ Wikipedia)
* Import S&P500 (Source: Yahoo Finance)
import excel using "$rawdata\Global Trends\MSCI_Returns.xlsx", clear firstrow
tempfile MSCI
save `MSCI', replace

* Temperature Anomaly Data from https://climate.nasa.gov/vital-signs/global-temperature/
import excel  using "$rawdata\Global Temperature\graph.xlsx", clear firstrow
rename Year transaction_year
label var No_Smoothing "Temperature anomaly without smoothing"
rename No_Smoothing global_temperature_anonmaly
keep global_temperature_anonmaly transaction_year
tempfile temperature
save `temperature', replace

* Import S&P500 (Source: Yahoo Finance)
import excel using "$rawdata\Global Trends\S&P500.xlsx", clear firstrow
gen transaction_year=year(Date )
* drop 1994 observation
drop if transaction_year==1994
destring Open, replace
collapse (mean) Open, by(transaction_year)
rename Open snp500
label var snp500 "S&P500 Index (opening)"
tempfile stock_index
save `stock_index', replace

* Import Commodity Price Index (Source: FRED St Louis)
import excel using "$rawdata\Global Trends\FRED_Commodity_Index.xlsx", clear firstrow
gen transaction_year=year( observation_date )
collapse (mean) PALLFNFINDEXQ, by(transaction_year)
rename PALLFNFINDEXQ commodity_index
label var commodity_index "FRED St. Louis Commodity Price Index"
tempfile com_index
save `com_index', replace

* Import World GDP Data (Source: WDI)
import excel using "$rawdata\Global Trends\GDP_PPP_Curr_Dollar.xlsx", clear firstrow
keep if SeriesName=="GDP, PPP (current international $)"
keep if CountryName=="World"
reshape long y, i(CountryName) j(transaction_year)
rename y GDP_global
label var GDP_global "World GDP"
destring GDP_global, replace
keep GDP_global transaction_year
tempfile gdp_glob
save `gdp_glob', replace

* Import FDI (Source:WDI)
import excel using "$rawdata\Global Trends\WDI_FDI.xlsx", clear firstrow
keep if CountryName=="World"
drop CountryCode IndicatorName IndicatorCode AG
reshape long y, i(CountryName) j(transaction_year)
rename y FDI_global
label var FDI_global "Annual FDI outflows"
tempfile fdi_glob
save  `fdi_glob', replace

* Import Trade (Source: UN Comtrade)
import delimited using "$rawdata\Global Trends\comtrade.csv", clear  delim(",")
* Only use imports as generally better documented
keep if tradeflow=="Import"
* keep only relevant variables
keep year tradevalueus reporter
collapse (sum) tradevalueus, by(year)
renvars year tradevalueus / transaction_year Imports_global
tempfile t1
save `t1', replace
import delimited using "$rawdata\Global Trends\comtrade(1).csv", clear  delim(",")
* Only use imports as generally better documented
keep if tradeflow=="Import"
* keep only relevant variables
keep year tradevalueus reporter
collapse (sum) tradevalueus, by(year)
renvars year tradevalueus / transaction_year Imports_global
tempfile t2
save `t2', replace
import delimited using "$rawdata\Global Trends\comtrade(2).csv", clear  delim(",")
* Only use imports as generally better documented
keep if tradeflow=="Import"
* keep only relevant variables
keep year tradevalueus reporter
collapse (sum) tradevalueus, by(year)
renvars year tradevalueus / transaction_year Imports_global
tempfile t3
save `t3', replace
import delimited using "$rawdata\Global Trends\comtrade(3).csv", clear  delim(",")
* Only use imports as generally better documented
keep if tradeflow=="Import"
* keep only relevant variables
keep year tradevalueus reporter
collapse (sum) tradevalueus, by(year)
renvars year tradevalueus / transaction_year Imports_global
tempfile t4
save `t4', replace
import delimited using "$rawdata\Global Trends\comtrade(4).csv", clear  delim(",")
* Only use imports as generally better documented
keep if tradeflow=="Import"
* keep only relevant variables
keep year tradevalueus reporter
collapse (sum) tradevalueus, by(year)
renvars year tradevalueus / transaction_year Imports_global
tempfile t5
save `t5', replace
append using `t1'
append using `t2'
append using `t3'
append using `t4'
tempfile trade_glob
save `trade_glob', replace

* Import global battle related deaths
import excel using "$rawdata\Global Trends\ucdp-brd-conf-181.xlsx", clear firstrow
collapse (sum) bdbest, by(year)
renvars bdbest year / brd_global transaction_year
tempfile brd_glob
save `brd_glob', replace

**************************
* Import birth regions   *
**************************
import excel using "$rawdata\birthregions\PLAD_19.04.2021_ALL.xlsx", firstrow clear

gen c = "c"
gen r = "r"
egen ID_adm1 = concat(c id_0 r id_1)
egen ID_adm2 = concat(c id_0 r id_1 r id_2)
drop c r
replace endyear="2021" if endyear=="ongoing"
destring endyear, replace

drop if ID_adm1=="c.r." | ID_adm1=="cr"
gen tenure=endyear-startyear+1


* Expand dataset
sort plad_id ID_adm1 
expand tenure
sort plad_id ID_adm1 
bys plad_id ID_adm1 : gen n=_n
gen year=startyear+n-1

* Drop foreign leaders
destring foreign_leader, replace
drop if  foreign_leader==1

* Drop part-year leaders
gen startmonth=substr(startdate, 6,2)
gen endmonth=substr(startdate, 6,2)
destring startmonth endmonth, replace

drop if startmonth>=7 & n==1
drop if endmonth<=6 & n==1

keep ID_adm1 year
renvars year / transaction_year

* Drop redundant years
drop if transaction_year>2014
drop if transaction_year<1995

gen birthregion=1
label var birthregion "Dummy if a region was leader birht region (Dreher et al. 2021)"
save "$data\BirthRegions\br.dta", replace


* Merge additional data
clear all
use "$data\Merge\IDA_Aid_GED_ADM1.dta", clear
* Drop redundant years
drop if transaction_year>2014
drop if transaction_year<1995
merge m:1 transaction_year using `trade_glob', nogen keep(1 3)
merge m:1 transaction_year using `fdi_glob', nogen keep(1 3)
merge m:1 transaction_year using `gdp_glob', nogen keep(1 3)
merge m:1 transaction_year using `brd_glob', nogen keep(1 3)
merge m:1 transaction_year using `com_index', nogen keep(1 3)
merge m:1 transaction_year using `stock_index', nogen keep(1 3)
merge m:1 transaction_year using `MSCI', nogen keep(1 3)
merge m:1 transaction_year using `temperature', nogen keep(1 3)
merge m:1 transaction_year ADM0 using "$data\Xpolity\Xpolity.dta", keep(1 3) nogen
merge m:1 transaction_year ADM0 using "$data\FreedomHouse\press_freedom.dta", keep(1 3) nogen
 merge m:1 transaction_year ADM0 using  "$data\HRV_Transparency\transparency.dta", keep(1 3) nogen
 merge m:1 transaction_year ID_adm1 using "$data\BirthRegions\br.dta", keep(1 3) nogen
 * replace birthregion = 0 for all cases where it is not 1
 replace birthregion=0 if birthregion==.
 label var d_miss_ADM2 "Dummy if ADM2 region is missing"
 label var lights_sum "Mean of sum of regional nightlights"
save "$data\Merge\IDA_Aid_GED_ADM1.dta", replace


use "$data\Merge\IDA_Aid_GED_ADM2.dta", clear
merge m:1 transaction_year ADM0 using "$data\Xpolity\Xpolity.dta", keep(1 3) nogen
merge m:1 transaction_year ADM0 using "$data\FreedomHouse\press_freedom.dta", keep(1 3) nogen
 merge m:1 transaction_year ADM0 using  "$data\HRV_Transparency\transparency.dta", keep(1 3) nogen
  label var d_miss_ADM2 "Dummy if ADM2 region is missing"
save "$data\Merge\IDA_Aid_GED_ADM2.dta", replace


*************************************************************
* Create Data for Descriptive Statistics on Precision Codes *
*************************************************************
/*
*/
set matsize 11000
global cwd "$data\Aid\2017_11_14_WB"
cd "$cwd"

import delimited using "$data\ADM\gadm28_adm0_valid.csv", clear
rename name_engli ADM0
rename iso ISO3
rename id_0 id_0n100
keep unregion2 ISO3
duplicates drop ISO3 unregion2, force
tempfile unregion2
save `unregion2', replace


forvalues i=1995(1)2012 {
import excel "$data\Aid\IDA_IBRD_transactions.xlsx", firstrow clear
/*XXXXXX Melvin 05.01.2018 @Kai disbursements are sometimes negative. Should we drop them to avoid miscounting locations?
I checked the WorldBank transaction file for Project P000603

Jun, 2009	IDAN0310	Fees			37.788,94
Jun, 2009	IDAN0310	Repayment		78.302,85
Sep, 2009	IDAN0310	Disbursement	-92.437,99
Okt, 2009	IDAN0310	Disbursement	-39.091,19
Nov, 2009	IDAN0310	Fees			29.005,87
Nov, 2009	IDAN0310	Repayment		81.084,39
Nov, 2009	IDAN0310	Cancellations	170.268,45
*Drop repayments
*/ 
drop if transactionvalue<0 //4346 out of 149848 transaction coded as missing (about 3%)

renvars projectid year transactionvalue/  project_id transaction_year transaction_value
keep if financier=="IDA"
keep project_id transaction_year transaction_value
drop if transaction_year!=`i'
egen transaction_value_tot=total(transaction_value), by( project_id)  
label variable transaction_value_tot "Total value per project per year"
* Generate count variable for number of positive project disbursements
gen count=1 if transaction_value>0  
egen Disbursementcount=total(count), by(project_id transaction_year)
label var Disbursementcount "Sum of yearly positive disbursements within project" 
drop transaction_value
/*XXXXXX Melvin 29.12.2017: @Lennart, do you happen to know why some projects 
are not matched? e.g. project P008275 in the year1995 with 6 disbursments.
*/
collapse (mean) transaction_value_tot Disbursementcount, by(project_id transaction_year)

merge 1:m project_id using "C:\Users\lkaplan\Dropbox\Geocoded Aid and Conflict\Data\gridcell_dataset\2_output\2_1_prepAid\alg.dta", nogen keep(3 1)

* XXXXXXXXX Lennart 04.01.2018: The part below is to a large extent recoded. Reviewing is, hence, necessary. Thanks!
/* 
Now, allocate aid flows that do not correspond to a certain administrative area in the following way
If there are 
5 locations, where 3 are geocoded on precision level 1-3 and 1 is geocoded on precision level 4, 1 is coded less precisely
Projectsum is X
Take 4/5*X as the amount to be totally allocated, thus 1/5X is lost in the data
Location weighted
- Each of the 3 ADM2 regions coded with precision 1-3  gets 1/5*X
- The ADM1 region coded with precision 4 gets 1/5*X and this is then distributed equally amongst the corresponding ADM2 regions
Pop weighted
- Each of the 3 ADM2 regions coded with precision 1-3 gets Pop(i)/(Sum Pop)*4/5*X 
- The ADM1 region coded with precision 4 gets Pop(i)/(Sum Pop)*4/5*X and this is then distributed equally amongst the corresponding ADM2 regions

An analogous approach is applied to the disbursementcount / transaction count (e.g., the number of projectwise transactions from the IDA account to the project). Although the interpretation might not be intuitive (e.g., a project
with 2 transactions might have 10 locations, so we have a disbursementcount of 0.2), it is most closely comparable to the USD amounts. Alternatively, we could think about numbers of active projects.
Example code
gen temp_totlocation =																//Number of locations of entire project
gen temp_totcoded = 																//Number of locations that are precisely coded (higher than precision level 5)
gen temp_totcoded4 = 																//Number of locations that are precisely coded (precision level 4)
gen temp_totcoded13 = 																//Number of locations that are precisely coded (higher than precision level 4)
gen temp_projsum = temp_totcoded/temp_totlocation*transaction_value_tot				//Total amount of project amount to be allocated to different regions
*/

//temp_totlocation: Number of locations with positive project disbursements for entire project year
gen count=1
egen temp_totlocation=total(count), by(project_id transaction_year)
drop count

//temp_totcoded: Number of locations that are precisely coded (higher than precision level 4)
gen count=1 if precision_N100<=4
egen temp_totcoded=total(count), by(project_id transaction_year)
drop count

//temp_totcoded: Number of locations that are coded with precision 4
gen count=1 if precision_N100==4
egen temp_totcoded4=total(count), by(project_id transaction_year)
drop count

//temp_totcoded: Number of locations that are precisely coded (higher than precision level 4)
gen count=1 if precision_N100<4
egen temp_totcoded13=total(count), by(project_id transaction_year)
drop count

//temp_projsum: Total amount of project amount to be allocated to different regions after discounting for information loss
rename transaction_value_tot temp_value																
gen transaction_value_tot= temp_totcoded/temp_totlocation*temp_value		

save `i'.dta, replace 
}

* Put yearly disbursements together
clear
use 1995.dta, clear
forvalues i=1996(1)2012 {
append using `i'.dta
erase `i'.dta
}
erase 1995.dta
merge m:1 ISO using `unregion2', nogen keep(1 3) 

 by project_loC, sort: gen nvals = _n == 1
 * Gen Disbursement Amounts
egen total_aid=total(transaction_value_tot)  if unregion2=="Africa"
egen total_aid1234=total(transaction_value_tot)  if precision<=4 & unregion2=="Africa"
egen total_aid123=total(transaction_value_tot)  if precision<4 & unregion2=="Africa"
egen total_aid12=total(transaction_value_tot)  if precision<3 & unregion2=="Africa"
egen total_aid1=total(transaction_value_tot)  if precision==1 & unregion2=="Africa"
egen total_aid2=total(transaction_value_tot) if precision==2 & unregion2=="Africa"
egen total_aid3=total(transaction_value_tot)  if precision==3 & unregion2=="Africa"
egen total_aid4=total(transaction_value_tot)  if precision==4 & unregion2=="Africa"
egen total_aid5678=total(transaction_value_tot)  if precision>4 & unregion2=="Africa"
* Gen Location Counts
egen total=total(nvals)  if unregion2=="Africa"
egen total1234=total(nvals) if precision<=4 & unregion2=="Africa"
egen total123=total(nvals) if precision<4 & unregion2=="Africa"
egen total12=total(nvals) if precision<3 & unregion2=="Africa"
egen total1=total(nvals) if precision==1 & unregion2=="Africa"
egen total2=total(nvals) if precision==2 & unregion2=="Africa"
egen total3=total(nvals) if precision==3 & unregion2=="Africa"
egen total4=total(nvals) if precision==4 & unregion2=="Africa"
egen total5678=total(nvals) if precision>4 & unregion2=="Africa"



* Create Project Durations to get an understanding of the length of an average project
bysort project_id: egen begin=min(transaction_year)
bysort project_id: egen end=max(transaction_year)
gen duration=end-begin
sum duration
drop begin end duration 

keep total*
gen donor="World Bank"
set scheme s2mono
label var total "Precision 1-8"
label var total1234 "Precision 1-4"
label var total123 "Precision 1-3"
label var total12 "Precision 1-2"
label var total12 "Precision 1"
* Replace Missings to have a complete dataset after duplicates drop
sort total 
replace total1 = total1[_n-1] if missing(total1) 
replace total2 = total2[_n-1] if missing(total2) 
replace total3 = total3[_n-1] if missing(total3) 
replace total4 = total4[_n-1] if missing(total4) 
replace total12 = total12[_n-1] if missing(total12) 
replace total123 = total123[_n-1] if missing(total123) 
replace total1234 = total1234[_n-1] if missing(total1234) 
replace total5678=total5678[_n-1] if missing(total5678)
sort total_aid1
replace total_aid1 = total_aid1[_n-1] if missing(total_aid1) 
replace total_aid2 = total_aid2[_n-1] if missing(total_aid2) 
replace total_aid3 = total_aid3[_n-1] if missing(total_aid3) 
replace total_aid4 = total_aid4[_n-1] if missing(total_aid4) 
replace total_aid12 = total_aid12[_n-1] if missing(total_aid12) 
replace total_aid123 = total_aid123[_n-1] if missing(total_aid123) 
replace total_aid1234 = total_aid1234[_n-1] if missing(total_aid1234) 
replace total_aid5678=total_aid5678[_n-1] if missing(total_aid5678)
keep if !missing(total1, total2, total3, total4 , total, total12, total123, total1234, total5678, total1, total2, total3, total4 , total_aid, total_aid3, total_aid4, total_aid12, total_aid123, total_aid1234, total_aid5678)



duplicates drop donor, force




tempfile wb
save `wb', replace   
   
   
   
   
* China

***
* Load Project Data
********************
import excel using "$data\Aid_China\aiddata_china_1_1_1.xlsx", sheet("1) Official Finance") firstrow clear
rename year transaction_year

* Create Project Durations to get a feeling of the length of an average project
gen begin=year( start_actual )
gen end=year( end_actual )
gen duration=end-begin
sum duration
drop begin end duration

save OF.dta, replace

*********************
* Load GADM-Aid Data
*********************
* Load ADM1 Data for the cases, where no ADM2 shapefile existed
import delim using "$data\Aid_China\spatial_join_adm1_chinese_aid.csv", clear
//generate local of ChinaAid countries to drop non ChinaAid countries at a later stage
levelsof id_0n100, local(ChinaAidCountries)

* Keep ID1 Identifier to merge these into ADM2 Data
keep target_fidn100 id_0n100 id_1n100 join_fidn100 isoc3 name_0c75 name_1c75
save adm1_v.dta, replace

**********************************
* Load ADM2 data
import delim using "$data\Aid_China\spatial_join_adm2_chinese_aid.csv", clear
drop id_0n100 isoc3 name_0c75 id_1n100 name_1c75  //clear entries from errors if no ADM2 regions identified; otherwise missing id_0 and id_1 entries

* Merge with the ADM1 Identifiers as there are some ADM2 regions missing and in this case the ADM1 region is also not coded (This issue seems persistent for two observations, "which fall into the sea")
merge 1:1 target_fidn100 using adm1_v.dta, nogen keep(1 3)

*create dummy variable indicating if a GADM2 region is missing, thus have been replaced by GADM1 region
gen byte d_miss_ADM2=(id_2n100==0 & id_1n100!=0)

renvars project_idn100 year_n100/  project_id transaction_year

//Manuall code projects that are identified in the ocean, but are acutally precisely coded, which is >=precision4 (See protocol from 03-09-2017 last pages)
replace id_0n100= 87 if project_id==1468 & join_fidn100==-1 & id_0n100==0
replace id_1n100= 5 if project_id==1468 & join_fidn100==-1 & id_1n100==0
replace id_2n100= 72 if project_id==1468 & join_fidn100==-1 & id_2n100==0
replace isoc3= "GHA" if project_id==1468 & join_fidn100==-1
replace name_0c75= "Ghana" if project_id==1468 & join_fidn100==-1
replace name_1c75= "Greater Accra" if project_id==1468 & join_fidn100==-1
replace name_2c75= "Dangbe West" if project_id==1468 & join_fidn100==-1


replace id_0n100= 203 if project_id==2081 & join_fidn100==-1 & id_0n100==0
replace id_1n100= 17 if project_id==2081 & join_fidn100==-1 & id_1n100==0
replace id_2n100= 0 if project_id==2081 & join_fidn100==-1 & id_2n100==0
replace isoc3= "SYC" if project_id==2081 & join_fidn100==-1
replace name_0c75= "Seychelles" if project_id==2081 & join_fidn100==-1
replace name_1c75= "Les Mamelles" if project_id==2081 & join_fidn100==-1 & name_1c75==""
replace d_miss_ADM2=1 if project_id==2081

replace id_0n100= 203 if project_id==1161 & join_fidn100==-1 & id_0n100==0
replace id_1n100= 22 if project_id==1161 & join_fidn100==-1 & id_1n100==0
replace id_2n100= 0 if project_id==1161 & join_fidn100==-1 & id_2n100==0
replace isoc3= "SYC" if project_id==1161 & join_fidn100==-1
replace name_0c75= "Seychelles" if project_id==1161 & join_fidn100==-1
replace name_1c75= "Pointe Larue" if project_id==1161 & join_fidn100==-1 & name_1c75=="" & precision_n100==4
replace d_miss_ADM2=1 if project_id==1161

replace id_0n100= 118 if project_id==1291 & join_fidn100==-1 & id_0n100==0
replace id_1n100= 28 if project_id==1291 & join_fidn100==-1 & id_1n100==0
replace id_2n100= 182 if project_id==1291 & join_fidn100==-1 & id_2n100==0 // Melvin 18.01.2018: Manually looked up geocode. Likely to belong to Mombasa-Mvita region
replace isoc3= "KEN" if project_id==1291 & join_fidn100==-1
replace name_0c75= "Kenya" if project_id==1291 & join_fidn100==-1
replace name_1c75= "Mombasa" if project_id==1291 & join_fidn100==-1
replace name_2c75= "Mvita" if project_id==1291 & join_fidn100==-1


* Generate unique identifier for each ADM region:
gen c = "c"
gen r = "r"
egen ID_adm1 = concat(c id_0n100 r id_1n100)
egen ID_adm2 = concat(c id_0n100 r id_1n100 r id_2n100)
drop c r  
label var ID_adm1 "Unique identifier for ADM1 region"
label var ID_adm2 "Unique identifier for ADM2 region"

* Merge the location data (master) with actual flow data (using)
merge m:1 project_id transaction_year using OF.dta, nogen //no mismatch from master

* Clean Data 
keep id_2n100 id_1n100 id_0n100 isoc3 precision_n100 d_miss_ADM2 name_0c75 name_1c75 name_2c75 project_id transaction_year titlec254 year_uncerc254 crs_sectorn100 crs_sect_1c254 sector_comc254 statusc254 status_codn100 flowc254 donor_ag_1n100 verifiedc254 flow_classc254 flow_cla_1n100 intentc254 activec254 start_actuc254 start_planc254 end_actualc254 end_plannec254 loan_typec254 line_of_crc254 is_cofinanc254 is_ground_c254 is_offician100 ID_adm1 ID_adm2 flow sources_count amount currency deflators_used exchange_rates_used usd_defl usd_current verified_cn100
* Drop observations which only signify a pledge, but no full-fetched disbursement
drop if statusc254=="Pipeline: Pledge"

* Keep only Official Development Assistance (ODA) & Other Official Finance (OOF) flows
keep if  flow_classc254=="ODA-like" //| flow_classc254=="Vague (Official Finance)" | flow_classc254=="OOF-like"

* rename data to fit program algorithm
renvars isoc3 id_0n100 id_1n100 id_2n100 precision_n100 name_0c75 name_1c75 name_2c75 / ISO3 ID_0 ID_1 ID_2 precision_N100 ADM0 ADM1 ADM2

save china_temp1.dta, replace

****next step: discount aid
use china_temp1.dta, clear
rename usd_current transaction_value_tot
drop if transaction_value==.  //963 out of 3117 obs missing (30%)

* Generate count variable for number of positive project disbursements
gen count=1 if transaction_value_tot>0
egen Disbursementcount=total(count), by(project_id transaction_year)
label var Disbursementcount "Sum of yearly positive disbursements within project" 
drop count


//temp_totlocation: Number of locations with positive project disbursements for entire project year
gen count=1
egen temp_totlocation=total(count), by(project_id transaction_year)
drop count

//temp_totcoded: Number of locations that are precisely coded (higher than precision level 4)
gen count=1 if precision_N100<=4
egen temp_totcoded=total(count), by(project_id transaction_year)
drop count

//temp_totcoded: Number of locations that are coded with precision 4
gen count=1 if precision_N100==4
egen temp_totcoded4=total(count), by(project_id transaction_year)
drop count

//temp_totcoded: Number of locations that are precisely coded (higher than precision level 4)
gen count=1 if precision_N100<4
egen temp_totcoded13=total(count), by(project_id transaction_year)
drop count

//temp_projsum: Total amount of project amount to be allocated to different regions after discounting for information loss
rename transaction_value_tot temp_value																
gen transaction_value_tot= temp_totcoded/temp_totlocation*temp_value

merge m:1 ISO using `unregion2', nogen keep(1 3) 


 * Gen Disbursement Amounts
egen total_aid=total(transaction_value_tot)  if unregion2=="Africa"
egen total_aid1234=total(transaction_value_tot)  if precision<=4 & unregion2=="Africa"
egen total_aid123=total(transaction_value_tot)  if precision<4 & unregion2=="Africa"
egen total_aid12=total(transaction_value_tot)  if precision<3 & unregion2=="Africa"
egen total_aid1=total(transaction_value_tot)  if precision==1 & unregion2=="Africa"
egen total_aid2=total(transaction_value_tot) if precision==2 & unregion2=="Africa"
egen total_aid3=total(transaction_value_tot)  if precision==3 & unregion2=="Africa"
egen total_aid4=total(transaction_value_tot)  if precision==4 & unregion2=="Africa"
egen total_aid5678=total(transaction_value_tot)  if precision>4 & unregion2=="Africa"
* Gen Location Counts by precision
gen nvals = 1
egen total=total(nvals)  if unregion2=="Africa"
egen total1234=total(nvals) if precision<=4 & unregion2=="Africa"
egen total123=total(nvals) if precision<4 & unregion2=="Africa"
egen total12=total(nvals) if precision<3 & unregion2=="Africa"
egen total1=total(nvals) if precision<2 & unregion2=="Africa"
egen total2=total(nvals) if precision==2 & unregion2=="Africa"
egen total3=total(nvals) if precision==3 & unregion2=="Africa"
egen total4=total(nvals) if precision==4 & unregion2=="Africa"
egen total5678=total(nvals) if precision>4 & unregion2=="Africa"

keep total*
gen donor="China"
label var total "Precision 1-8"
label var total1234 "Precision 1-4"
label var total123 "Precision 1-3"
label var total12 "Precision 1-2"
label var total12 "Precision 1"
* Replace missings to have a complete dataset after dropping missings
sort total 
replace total1 = total1[_n-1] if missing(total1) 
replace total2 = total2[_n-1] if missing(total2) 
replace total3 = total3[_n-1] if missing(total3) 
replace total4 = total4[_n-1] if missing(total4) 
replace total12 = total12[_n-1] if missing(total12) 
replace total123 = total123[_n-1] if missing(total123) 
replace total1234 = total1234[_n-1] if missing(total1234) 
replace total5678=total5678[_n-1] if missing(total5678)
sort total_aid 
replace total_aid1 = total_aid1[_n-1] if missing(total_aid1) 
replace total_aid2 = total_aid2[_n-1] if missing(total_aid2) 
replace total_aid3 = total_aid3[_n-1] if missing(total_aid3) 
replace total_aid4 = total_aid4[_n-1] if missing(total_aid4) 
replace total_aid12 = total_aid12[_n-1] if missing(total_aid12) 
replace total_aid123 = total_aid123[_n-1] if missing(total_aid123) 
replace total_aid1234 = total_aid1234[_n-1] if missing(total_aid1234) 
replace total_aid5678=total_aid5678[_n-1] if missing(total_aid5678)
keep if !missing(total1, total2, total3, total4 , total, total12, total123, total1234, total5678, total1, total2, total3, total4 , total_aid, total_aid12, total_aid123, total_aid1234, total_aid5678)
duplicates drop donor, force

append using `wb'


* Label variables
label var donor "Donor of ODA"
label var total`i' "Location count of all projects"
label var total_aid`i' "Aid amount of all projects"

foreach i in 1234 123 12 1 2 3 4 5678  `""' {
label var total`i' "Location count of projects with precision codes `i'"
label var total_aid`i' "Aid amount of projects with precision codes `i'"
}

save "$data/aid_by_precision_code.dta", replace


